# Deep Belief Networks

In [69]:
import numpy as np

import matplotlib.pyplot as plt

import tensorflow as tf

from keras.datasets import mnist
from keras.layers import Dense, Layer, Flatten
from keras.optimizers import AdamW
from keras.losses import BinaryCrossentropy
from keras.models import Sequential


In [70]:
# Load the MNIST dataset
(x_train, y_train), (x_test, y_test)  = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

In [71]:
# Define the RBM layer
class RBM(Layer):
    def __init__(self, units):
        super(RBM, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                                 initializer='zeros',
                                 trainable=True)

    def call(self, inputs):
        h = tf.nn.sigmoid(tf.matmul(inputs, self.w) + self.b)
        return h


In [72]:
# Create the DBN Model
model = Sequential([
    RBM(256),  # First RBM layer
    RBM(64),    # Second RBM layer
    Dense(784, activation='sigmoid'),  # Adding a dense layer to map back to original dimension
    Flatten(),
])

In [73]:
optimizer = AdamW(learning_rate=1e-4)
loss_function = BinaryCrossentropy(
    from_logits=False,
    label_smoothing=0.0,
    axis=-1,
    reduction="sum_over_batch_size",
)

In [74]:
# Compile and train the model
model.compile(optimizer=optimizer, loss=loss_function)
model.fit(x_train, x_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x2438f0ad940>

In [75]:
results = model.evaluate(x_test, y_test)



In [78]:
print("ADAM + MSE, 20 epochs, default lr")
print("313/313 [==============================] - 0s 1ms/step - loss: 27.0481")

print("ADAMW + CrossEntropy, 20 epochs, lr=1e-4")
print("313/313 [==============================] - 0s 1ms/step - loss: 28.5931")

ADAM + MSE, 20 epochs, default lr
ADAMW + CrossEntropy, 20 epochs, lr=1e-4


In [77]:
# Visualize the weights of the first RBM layer
# weights = model.layers[0].w.numpy().reshape(784, 256)

# fig, axes = plt.subplots(16, 16, figsize=(10, 10))
# for i, ax in enumerate(axes.flat):
#     if i < 256:
#         ax.imshow(weights[:, i].reshape(28, 28), cmap='gray')
#         ax.axis('off')

# plt.suptitle("Visualization of the First RBM Layer Weights in the DBN", fontsize=16)
# plt.show()