In [1]:
%reload_ext autoreload

# Imports

import os
import sys
module_path = os.path.abspath(os.path.join('../image_gen_dm')) # or the path to your source code
sys.path.append(str(module_path))

import tensorflow as tf
import tf_keras as tfk
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random

tfkl = tfk.layers
tfpl = tfp.layers
tfd = tfp.distributions

TF_ENABLE_ONEDNN_OPTS=0
os.environ['TF_ENABLE_ONEDNN_OPTS'] = str(TF_ENABLE_ONEDNN_OPTS)
os.environ['TG_GPU_ALLOCATOR'] = 'cuda_malloc_async'

import image_gen_vae as igvae
import image_gen_vae.constants as consts

print('Tensorflow Version:', tf.__version__)
print("Available GPUs: ", len(tf.config.list_physical_devices('GPU')))

2024-04-29 06:08:29.741047: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Tensorflow Version: 2.16.1
Available GPUs:  1


2024-04-29 06:08:36.764443: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 06:08:36.813567: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 06:08:36.813627: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


In [2]:
# Constants

IMAGE_SIZE = 64

ENCODER_HIDDEN_SIZES = [
    [448],
    [448],
    [448],
    [448],
    [448],
    [448],

    [448],
    [448],
    [448],
    [448],

    [448],
    [448],
    [448],
    [448],

    [1792, 448],
    [1792, 448],
    [1792, 448],
    [1792, 448],
    [1792, 448],
    [1792, 448],

    [1792, 448],
    [1792, 448],
    [1792, 448],
    [1792, 448],

    [1792, 448],
    [1792, 448],
    [1792, 448],
    [1792, 448],

    [448],
    [1792, 448],
]

DECODER_HIDDEN_SIZES = [] # Construct from encoder hidden sizes in reverse

for hidden_sizes in ENCODER_HIDDEN_SIZES:
    DECODER_HIDDEN_SIZES.append(hidden_sizes[::-1])


ENCODER_CONV_CONFIGS = [
    [(16, 3, 2)],
    [(16, 3, 2), (32, 3, 2)],
    [(16, 3, 2), (32, 3, 2), (64, 3, 2)],

    [(32, 5, 4)],
    [(32, 5, 4), (64, 5, 2)],
    [(32, 5, 4), (64, 3, 2), (128, 3, 2)],

    [(32, 3, 2), (64, 3, 2), (128, 3, 2)],
    [(32, 3, 2), (64, 3, 2), (128, 3, 2), (256, 3, 2)],
    [(64, 3, 2), (128, 5, 4), (256, 3, 2)],
    [(64, 3, 2), (128, 3, 2), (256, 3, 2), (512, 3, 2)],

    [(128, 3, 2), (256, 5, 4), (512, 3, 2), (1024, 3, 2)],
    [(128, 3, 2), (256, 3, 2), (512, 3, 2), (1024, 3, 2)],
    [(128, 3, 2), (256, 3, 2), (512, 5, 4), (1024, 3, 2), (2048, 3, 2)],
    [(128, 3, 2), (256, 3, 2), (512, 3, 2), (1024, 3, 2), (2048, 3, 2)],

    [(16, 3, 2)],
    [(16, 3, 2), (32, 3, 2)],
    [(16, 3, 2), (32, 3, 2), (64, 3, 2)],
    
    [(32, 5, 4)],
    [(32, 5, 4), (64, 5, 2)],
    [(32, 5, 4), (64, 3, 2), (128, 3, 2)],

    [(32, 3, 2), (64, 3, 2), (128, 3, 2)],
    [(32, 3, 2), (64, 3, 2), (128, 3, 2), (256, 3, 2)],
    [(64, 3, 2), (128, 5, 4), (256, 3, 2)],
    [(64, 3, 2), (128, 3, 2), (256, 3, 2), (512, 3, 2)],

    [(128, 3, 2), (256, 5, 4), (512, 3, 2), (1024, 3, 2)],
    [(128, 3, 2), (256, 3, 2), (512, 3, 2), (1024, 3, 2)],
    [(128, 3, 2), (256, 3, 2), (512, 5, 4), (1024, 3, 2), (2048, 3, 2)],
    [(128, 3, 2), (256, 3, 2), (512, 3, 2), (1024, 3, 2), (2048, 3, 2)],

    [(128, 3, 2), (256, 3, 2), (512, 3, 2), (1024, 3, 2), (2048, 3, 2), (4096, 3, 2)],
    [(128, 3, 2), (256, 3, 2), (512, 3, 2), (1024, 3, 2), (2048, 3, 2), (4096, 3, 2)],
]

DECODER_CONV_CONFIGS = [] # Construct from encoder conv configs in reverse

for conv_configs in ENCODER_CONV_CONFIGS:
    DECODER_CONV_CONFIGS.append(conv_configs[::-1])

FLATTENED_SHAPES = [
    (32, 32, 16),
    (16, 16, 32),
    (8, 8, 64),
    (16, 16, 32),
    (8, 8, 64),
    (4, 4, 128),

    (8, 8, 128),
    (4, 4, 256),
    (4, 4, 256),
    (4, 4, 512),

    (2, 2, 1024),
    (4, 4, 1024),
    (1, 1, 2048),
    (2, 2, 2048),

    (32, 32, 16),
    (16, 16, 32),
    (8, 8, 64),
    (16, 16, 32),
    (8, 8, 64),
    (4, 4, 128),

    (8, 8, 128),
    (4, 4, 256),
    (4, 4, 256),
    (4, 4, 512),

    (2, 2, 1024),
    (4, 4, 1024),
    (1, 1, 2048),
    (2, 2, 2048),

    (1, 1, 4096),
    (1, 1, 4096),
]

FLATTENED_SIZES = [] # Construct from flattened shapes

for shape in FLATTENED_SHAPES:
    FLATTENED_SIZES.append(shape[0] * shape[1] * shape[2])

LATENT_DIM = 2

MODEL_NAMES = []

for i in range(len(ENCODER_HIDDEN_SIZES)):
    if i < 9:
        MODEL_NAMES.append(f'cvae0{i+1}')
    else:
        MODEL_NAMES.append(f'cvae{i+1}')

LEARNING_RATE = 1e-4

VAL_PERCENTAGE = 0.1

# Models to run

MODEL_START = 27 # Rasmus 12-21 Viktor 22-29
MODEL_COUNT = 1

In [3]:
# Dataset loading

train_ds, val_ds = igvae.utils.load_datasets(val_percentage=VAL_PERCENTAGE)

Loading datasets...


2024-04-29 06:08:36.960899: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 06:08:36.961031: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 06:08:36.961073: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 06:08:37.419492: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 06:08:37.419618: I external/local_xla/xla/stream_executor

Training Images:  8100
Evaluation Images:  900
Training Images (post-duplication):  8100
Validation Images (post-duplication):  900


In [4]:
# Model class

class Sampling(tfkl.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

class CVAE(tfk.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = tfk.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tfk.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = tfk.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]
    
    def train_step(self, data):
        with tf.GradientTape() as tape:
            input_data, target_data = data
            z_mean, z_log_var, z = self.encoder(input_data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    tf.losses.binary_crossentropy(target_data, reconstruction), 
                    axis=(1,2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }
    
    def call(self, input_data, training=False):
        _, _, z = self.encoder(input_data)
        reconstructed_image = self.decoder(z)

        return reconstructed_image


In [5]:
# Encoder

models = []

for i in range(MODEL_START, MODEL_START + MODEL_COUNT):
    print(f'Model {i+1}')

    image_input = tfkl.Input(shape=consts.INPUT_SHAPE, name='image_input')
    
    x = image_input
    for filters, kernel_size, strides in ENCODER_CONV_CONFIGS[i]:
        x = tfkl.Conv2D(filters=filters, kernel_size=(kernel_size, kernel_size), strides=(strides, strides), activation='relu', padding='same')(x)

    flattened_image = tfkl.Flatten()(x)

    x = flattened_image
    for layer_size in ENCODER_HIDDEN_SIZES[i]:
        x = tfkl.Dense(layer_size, 
                    activation='relu',
                    kernel_initializer='glorot_uniform')(x)
        
    z_mean = tfkl.Dense(LATENT_DIM, name='z_mean', kernel_initializer='glorot_uniform')(x)
    z_log_var = tfkl.Dense(LATENT_DIM, name='z_log_var', kernel_initializer='glorot_uniform')(x)

    z = Sampling()([z_mean, z_log_var])

    encoder = tfk.Model(inputs=image_input, outputs=[z_mean, z_log_var, z], name='encoder')
    encoder.summary()

    # Decoder

    latent_inputs = tfkl.Input(shape=(LATENT_DIM,))

    y = latent_inputs
    for layer_size in DECODER_HIDDEN_SIZES[i]:
        y = tfkl.Dense(layer_size, 
                       activation='relu',
                       kernel_initializer='glorot_uniform')(y)
        
    reconstructed_flattened_image = tfkl.Dense(FLATTENED_SIZES[i], kernel_initializer='glorot_uniform', activation='relu')(y)
    
    y = tfkl.Reshape(FLATTENED_SHAPES[i])(reconstructed_flattened_image)
   
    j = 0
    for filters, kernel_size, strides in DECODER_CONV_CONFIGS[i]:
        if j == len(DECODER_CONV_CONFIGS[i]) - 1:
            y = tfkl.Conv2DTranspose(filters=3, kernel_size=(kernel_size, kernel_size), strides=(strides, strides), activation='sigmoid', padding='same')(y)
        else:
            y = tfkl.Conv2DTranspose(filters=DECODER_CONV_CONFIGS[i][j + 1][0], kernel_size=(kernel_size, kernel_size), strides=(strides, strides), activation='relu', padding='same')(y)
        j += 1

    decoder = tfk.Model(inputs=latent_inputs, outputs=y, name='decoder')
    decoder.summary()

    # Model
        
    model = CVAE(encoder, decoder)
    model.compile(optimizer=tfk.optimizers.Adam(learning_rate=LEARNING_RATE))

    models.append(model)



Model 28
Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 image_input (InputLayer)    [(None, 64, 64, 3)]          0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 32, 32, 128)          3584      ['image_input[0][0]']         
                                                                                                  
 conv2d_1 (Conv2D)           (None, 16, 16, 256)          295168    ['conv2d[0][0]']              
                                                                                                  
 conv2d_2 (Conv2D)           (None, 8, 8, 512)            1180160   ['conv2d_1[0][0]']            
                                                                                   

In [6]:
# Run model

epochs = 256

i = MODEL_START
for model in models:

    history = igvae.run_model(model=model, train_ds=train_ds, val_ds=val_ds, epochs=epochs, name=MODEL_NAMES[i], plot=False)
    
    pd.DataFrame(history.history).to_csv(f'res/histories/{MODEL_NAMES[i]}.csv')

    i += 1


Epoch 1/256


2024-04-29 06:08:56.446016: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8906
2024-04-29 06:08:58.563828: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.94GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
I0000 00:00:1714370939.429281      80 service.cc:145] XLA service 0x7fa28a249b00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1714370939.429459      80 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
2024-04-29 06:08:59.446390: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1714370939.547678      80 device_compiler.h:188] Compiled cluste



2024-04-29 06:09:09.892853: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.36GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


Epoch 2/256
Epoch 3/256
Epoch 4/256
Epoch 5/256
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256
Epoch 10/256
Epoch 11/256
Epoch 12/256
Epoch 13/256
Epoch 14/256
Epoch 15/256
Epoch 16/256
Epoch 17/256
Epoch 18/256
Epoch 19/256
Epoch 20/256
Epoch 21/256
Epoch 22/256
Epoch 23/256
Epoch 24/256
Epoch 25/256
Epoch 26/256
Epoch 27/256
Epoch 28/256
Epoch 29/256
Epoch 30/256
Epoch 31/256
Epoch 32/256
Epoch 33/256
Epoch 34/256
Epoch 35/256
Epoch 36/256
Epoch 37/256
Epoch 38/256
Epoch 39/256
Epoch 40/256
Epoch 41/256
Epoch 42/256
Epoch 43/256
Epoch 44/256
Epoch 45/256
Epoch 46/256
Epoch 47/256
Epoch 48/256
Epoch 49/256
Epoch 50/256
Epoch 51/256
Epoch 52/256
Epoch 53/256
Epoch 54/256
Epoch 55/256
Epoch 56/256
Epoch 57/256
Epoch 58/256
Epoch 59/256
Epoch 60/256
Epoch 61/256
Epoch 62/256
Epoch 63/256
Epoch 64/256
Epoch 65/256
Epoch 66/256
Epoch 67/256
Epoch 68/256
Epoch 69/256
Epoch 70/256
Epoch 71/256
Epoch 72/256
Epoch 73/256
Epoch 74/256
Epoch 75/256
Epoch 76/256
Epoch 77/256
Epoch 78/256
Epoch 7