### Denoising Diffusion Implicit Models (DDIM)

https://keras.io/examples/generative/ddim/

In [1]:
import math
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow import keras
from keras import layers

2023-03-20 00:48:49.331203: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-20 00:48:49.409433: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-03-20 00:48:49.783782: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-03-20 00:48:49.783846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not l

In [2]:
# data
dataset_name = "oxford_flowers102"
dataset_repetitions = 5
num_epochs = 1  # train for at least 50 epochs for good results
image_size = 64
# KID = Kernel Inception Distance, see related section
kid_image_size = 75
kid_diffusion_steps = 5
plot_diffusion_steps = 20

# sampling
min_signal_rate = 0.02
max_signal_rate = 0.95

# architecture
widths = [32, 64, 96, 128]
block_depth = 2

# optimization
batch_size = 64
ema = 0.999
learning_rate = 1e-3
weight_decay = 1e-4

### Oxford Flowers 102 dataset
https://www.tensorflow.org/datasets/catalog/oxford_flowers102

In [3]:
def preprocess_image(data):
    # center crop image
    height = tf.shape(data["image"])[0]
    width = tf.shape(data["image"])[1]
    crop_size = tf.minimum(height, width)
    image = tf.image.crop_to_bounding_box(
        data["image"],
        (height - crop_size) // 2,
        (width - crop_size) // 2,
        crop_size,
        crop_size,
    )

    # resize and clip
    # for image downsampling it is important to turn on antialiasing
    image = tf.image.resize(image, size=[image_size, image_size], antialias=True)
    return tf.clip_by_value(image / 255.0, 0.0, 1.0)


def prepare_dataset(split):
    # the validation dataset is shuffled as well, because data order matters
    # for the KID estimation
    return (
        tfds.load(dataset_name, split=split, shuffle_files=True)
        .map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
        .cache()
        .repeat(dataset_repetitions)
        .shuffle(10 * batch_size)
        .batch(batch_size, drop_remainder=True)
        .prefetch(buffer_size=tf.data.AUTOTUNE)
    )


# load dataset
train_dataset = prepare_dataset("train[:80%]+validation[:80%]+test[:80%]")
val_dataset = prepare_dataset("train[80%:]+validation[80%:]+test[80%:]")

2023-03-20 00:48:54.088038: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-03-20 00:48:54.120704: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-03-20 00:48:54.120739: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-03-20 00:48:54.121470: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other ope

In [4]:
from ddim import DiffusionModel

In [6]:
# create and compile the model
model = DiffusionModel(
    image_size,
    widths,
    block_depth,
    batch_size,
    ema,
    min_signal_rate,
    max_signal_rate,
    kid_diffusion_steps,
    kid_image_size
)
# below tensorflow 2.9:
# pip install tensorflow_addons
# import tensorflow_addons as tfa
# optimizer=tfa.optimizers.AdamW
model.compile(
    optimizer=keras.optimizers.experimental.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    ),
    loss=keras.losses.mean_absolute_error,
)
# pixelwise mean absolute error is used as loss

# save the best model based on the validation KID metric
checkpoint_path = "checkpoints/diffusion_model"
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,
    monitor="val_kid",
    mode="min",
    save_best_only=True,
)

# calculate mean and variance of training dataset for normalization
model.normalizer.adapt(train_dataset)

# run training and plot generated images periodically
model.fit(
    train_dataset,
    epochs=num_epochs,
    validation_data=val_dataset,
    callbacks=[
        keras.callbacks.LambdaCallback(on_epoch_end=model.plot_images),
        checkpoint_callback,
    ],
)


NameError: name 'image_size' is not defined

In [17]:
print("\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)\nCell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[39m# create and compile the model\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m model \u001b[39m=\u001b[39m DiffusionModel(\n\u001b[1;32m      3\u001b[0m     image_size,\n\u001b[1;32m      4\u001b[0m     widths,\n\u001b[1;32m      5\u001b[0m     block_depth,\n\u001b[1;32m      6\u001b[0m     batch_size,\n\u001b[1;32m      7\u001b[0m     ema,\n\u001b[1;32m      8\u001b[0m     min_signal_rate,\n\u001b[1;32m      9\u001b[0m     max_signal_rate,\n\u001b[1;32m     10\u001b[0m     kid_diffusion_steps\n\u001b[1;32m     11\u001b[0m )\n\u001b[1;32m     12\u001b[0m \u001b[39m# # below tensorflow 2.9:\u001b[39;00m\n\u001b[1;32m     13\u001b[0m \u001b[39m# # pip install tensorflow_addons\u001b[39;00m\n\u001b[1;32m     14\u001b[0m \u001b[39m# # import tensorflow_addons as tfa\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     45\u001b[0m \u001b[39m#     ],\u001b[39;00m\n\u001b[1;32m     46\u001b[0m \u001b[39m# )\u001b[39;00m\n\nFile \u001b[0;32m~/proj/diffusion-models/ddim.py:35\u001b[0m, in \u001b[0;36mDiffusionModel.__init__\u001b[0;34m(self, image_size, widths, block_depth, batch_size, ema, min_signal_rate, max_signal_rate, kid_diffusion_steps)\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mkid_diffusion_steps \u001b[39m=\u001b[39m kid_diffusion_steps\n\u001b[1;32m     34\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnormalizer \u001b[39m=\u001b[39m layers\u001b[39m.\u001b[39mNormalization()\n\u001b[0;32m---> 35\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnetwork \u001b[39m=\u001b[39m unet\u001b[39m.\u001b[39;49mresidual_unet(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mimage_size, widths, block_depth)\n\u001b[1;32m     36\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mema_network \u001b[39m=\u001b[39m keras\u001b[39m.\u001b[39mmodels\u001b[39m.\u001b[39mclone_model(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnetwork)\n\nFile \u001b[0;32m~/proj/diffusion-models/unet.py:70\u001b[0m, in \u001b[0;36mresidual_unet\u001b[0;34m(image_size, widths, block_depth)\u001b[0m\n\u001b[1;32m     67\u001b[0m noisy_images \u001b[39m=\u001b[39m keras\u001b[39m.\u001b[39mInput(shape\u001b[39m=\u001b[39m(image_size, image_size, \u001b[39m3\u001b[39m))\n\u001b[1;32m     68\u001b[0m noise_variances \u001b[39m=\u001b[39m keras\u001b[39m.\u001b[39mInput(shape\u001b[39m=\u001b[39m(\u001b[39m1\u001b[39m, \u001b[39m1\u001b[39m, \u001b[39m1\u001b[39m))\n\u001b[0;32m---> 70\u001b[0m e \u001b[39m=\u001b[39m layers\u001b[39m.\u001b[39;49mLambda(sinusoidal_embedding)(noise_variances)\n\u001b[1;32m     71\u001b[0m e \u001b[39m=\u001b[39m layers\u001b[39m.\u001b[39mUpSampling2D(size\u001b[39m=\u001b[39mimage_size, interpolation\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mnearest\u001b[39m\u001b[39m\"\u001b[39m)(e)\n\u001b[1;32m     73\u001b[0m x \u001b[39m=\u001b[39m layers\u001b[39m.\u001b[39mConv2D(widths[\u001b[39m0\u001b[39m], kernel_size\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)(noisy_images)\n\nFile \u001b[0;32m~/proj/diffusion-models/.env/lib/python3.10/site-packages/keras/utils/traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     67\u001b[0m     filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n\u001b[1;32m     68\u001b[0m     \u001b[39m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m     69\u001b[0m     \u001b[39m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m     \u001b[39mraise\u001b[39;00m e\u001b[39m.\u001b[39mwith_traceback(filtered_tb) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39m\n\u001b[1;32m     71\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m     72\u001b[0m     \u001b[39mdel\u001b[39;00m filtered_tb\n\nFile \u001b[0;32m~/proj/diffusion-models/unet.py:56\u001b[0m, in \u001b[0;36msinusoidal_embedding\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m     51\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39msinusoidal_embedding\u001b[39m(x):\n\u001b[1;32m     52\u001b[0m     embedding_min_frequency \u001b[39m=\u001b[39m \u001b[39m1.0\u001b[39m\n\u001b[1;32m     53\u001b[0m     frequencies \u001b[39m=\u001b[39m tf\u001b[39m.\u001b[39mexp(\n\u001b[1;32m     54\u001b[0m         tf\u001b[39m.\u001b[39mlinspace(\n\u001b[1;32m     55\u001b[0m             tf\u001b[39m.\u001b[39mmath\u001b[39m.\u001b[39mlog(embedding_min_frequency),\n\u001b[0;32m---> 56\u001b[0m             tf\u001b[39m.\u001b[39mmath\u001b[39m.\u001b[39mlog(embedding_max_frequency),\n\u001b[1;32m     57\u001b[0m             embedding_dims \u001b[39m/\u001b[39m\u001b[39m/\u001b[39m \u001b[39m2\u001b[39m,\n\u001b[1;32m     58\u001b[0m         )\n\u001b[1;32m     59\u001b[0m     )\n\u001b[1;32m     60\u001b[0m     angular_speeds \u001b[39m=\u001b[39m \u001b[39m2.0\u001b[39m \u001b[39m*\u001b[39m math\u001b[39m.\u001b[39mpi \u001b[39m*\u001b[39m frequencies\n\u001b[1;32m     61\u001b[0m     embeddings \u001b[39m=\u001b[39m tf\u001b[39m.\u001b[39mconcat(\n\u001b[1;32m     62\u001b[0m         [tf\u001b[39m.\u001b[39msin(angular_speeds \u001b[39m*\u001b[39m x), tf\u001b[39m.\u001b[39mcos(angular_speeds \u001b[39m*\u001b[39m x)], axis\u001b[39m=\u001b[39m\u001b[39m3\u001b[39m\n\u001b[1;32m     63\u001b[0m     )\n\n\u001b[0;31mNameError\u001b[0m: Exception encountered when calling layer \"lambda_3\" (type Lambda).\n\nname 'embedding_max_frequency' is not defined\n\nCall arguments received by layer \"lambda_3\" (type Lambda):\n  • inputs=tf.Tensor(shape=(None, 1, 1, 1), dtype=float32)\n  • mask=None\n  • training=None")

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
Cell [0;32mIn[11], line 2[0m
[1;32m      1[0m [39m# create and compile the model[39;00m
[0;32m----> 2[0m model [39m=[39m DiffusionModel(
[1;32m      3[0m     image_size,
[1;32m      4[0m     widths,
[1;32m      5[0m     block_depth,
[1;32m      6[0m     batch_size,
[1;32m      7[0m     ema,
[1;32m      8[0m     min_signal_rate,
[1;32m      9[0m     max_signal_rate,
[1;32m     10[0m     kid_diffusion_steps
[1;32m     11[0m )
[1;32m     12[0m [39m# # below tensorflow 2.9:[39;00m
[1;32m     13[0m [39m# # pip install tensorflow_addons[39;00m
[1;32m     14[0m [39m# # import tensorflow_addons as tfa[39;00m
[0;32m   (...)[0m
[1;32m     45[0m [39m#     ],[39;00m
[1;32m     46[0m [39m# )[39;00m

File [0;32m~/proj/diffusion-models/ddim.py:35[0m, in [0;36mDiffusionModel.__i