# Deep Belief Networks

In [115]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [116]:
# Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda:0


In [117]:
# Define a single layer of the Deep Belief Network (as a Restricted Boltzmann Machine)
class RBM(nn.Module):
    def __init__(self, n_vis, n_hid):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(n_hid, n_vis) * 0.1)
        self.v_bias = nn.Parameter(torch.zeros(n_vis))
        self.h_bias = nn.Parameter(torch.zeros(n_hid))

    def forward(self, v):
        # Forward pass - calculate the probabilities of hidden units given visible units
        p_h_given_v = torch.sigmoid(F.linear(v, self.W, self.h_bias))
        return p_h_given_v

In [118]:
# Define the Deep Belief Network
class DBN(nn.Module):
    def __init__(self, n_vis, hidden_layers, n_out):
        super(DBN, self).__init__()
        self.rbms = nn.ModuleList(
            [
                RBM(
                    n_vis if i == 0 else hidden_layers[i-1],
                    h
                )
                for i, h in enumerate(hidden_layers)
            ]
        )# Additional layer to map back to original input size
        self.output_layer = nn.Linear(hidden_layers[-1], n_out)

    def forward(self, v):
        for rbm in self.rbms:
            v = rbm(v)
        v = self.output_layer(v)
        return torch.sigmoid(v)

In [119]:
# MNIST dataset loading with transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [120]:
dbn = DBN(n_vis=784, hidden_layers=[500, 200, 50], n_out=784)

In [121]:
# Training loop
def train(dbn, train_loader, epochs, learning_rate, device):
    dbn.to(device)  # Transfer the model to GPU
    optimizer = torch.optim.Adam(dbn.parameters(), lr=learning_rate)
    loss_function = nn.BCELoss()

    dbn.train()
    for epoch in range(epochs):
        total_loss = 0
        for data, _ in train_loader:
            data = data.view(-1, 784).to(device)  # Reshape and transfer data to GPU
            optimizer.zero_grad()
            output = dbn(data)
            loss = loss_function(output, data)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch}, Loss: {total_loss / len(train_loader)}')

In [122]:
train(dbn, train_loader, epochs=10, learning_rate=0.001, device=device)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
