In [None]:
from sklearn.neural_network import BernoulliRBM
import numpy as np
from sklearn import linear_model, datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

digits = datasets.load_digits()
X = np.asarray(digits.data, 'float32')
Y = digits.target
X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001)  # 0-1 scaling

X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
                                                    test_size=0.2,
                                                    random_state=0)

logistic = linear_model.LogisticRegression(C=100)
rbm1 = BernoulliRBM(n_components=100, learning_rate=0.06, n_iter=100, verbose=1, random_state=101)
rbm2 = BernoulliRBM(n_components=80, learning_rate=0.06, n_iter=100, verbose=1, random_state=101)
rbm3 = BernoulliRBM(n_components=60, learning_rate=0.06, n_iter=100, verbose=1, random_state=101)
DBN3 = Pipeline(steps=[('rbm1', rbm1),('rbm2', rbm2), ('rbm3', rbm3), ('logistic', logistic)])

DBN3.fit(X_train, Y_train)

print("Logistic regression using RBM features:\n%s\n" % (
    metrics.classification_report(
        Y_test,
        DBN3.predict(X_test))))

[BernoulliRBM] Iteration 1, pseudo-likelihood = -26.00, time = 0.13s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -25.68, time = 0.29s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -25.18, time = 0.24s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -23.14, time = 0.21s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -21.82, time = 0.25s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -21.74, time = 0.29s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -21.47, time = 0.30s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -21.10, time = 0.20s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -20.80, time = 0.24s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -20.55, time = 0.24s
[BernoulliRBM] Iteration 11, pseudo-likelihood = -20.25, time = 0.25s
[BernoulliRBM] Iteration 12, pseudo-likelihood = -20.30, time = 0.17s
[BernoulliRBM] Iteration 13, pseudo-likelihood = -20.00, time = 0.33s
[BernoulliRBM] Iteration 14, pseudo-likelihood = -20.11, time = 0.29s
[BernoulliRBM] Iteration 15, 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np

# Config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 64
EPOCHS = 5

# Transform: Convert to grayscale + normalize
transform = transforms.Compose([
    transforms.Grayscale(),  # Converts RGB -> 1 channel
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load CIFAR10
train_dataset = CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = CIFAR10(root='./data', train=False, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# ======================= RBM ======================= #
class RBM(nn.Module):
    def __init__(self, n_vis, n_hid):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(n_hid, n_vis) * 0.01)
        self.v_bias = nn.Parameter(torch.zeros(n_vis))
        self.h_bias = nn.Parameter(torch.zeros(n_hid))

    def sample_h(self, v):
        prob = torch.sigmoid(F.linear(v, self.W, self.h_bias))
        return prob.bernoulli(), prob

    def sample_v(self, h):
        prob = torch.sigmoid(F.linear(h, self.W.t(), self.v_bias))
        return prob.bernoulli(), prob

    def contrastive_divergence(self, v0, k=1, lr=0.1):
        vk = v0
        for _ in range(k):
            hk, _ = self.sample_h(vk)
            vk, _ = self.sample_v(hk)
        h0_prob = torch.sigmoid(F.linear(v0, self.W, self.h_bias))
        hk_prob = torch.sigmoid(F.linear(vk, self.W, self.h_bias))
        self.W.data += lr * (h0_prob.t() @ v0 - hk_prob.t() @ vk) / v0.size(0)
        self.v_bias.data += lr * torch.mean(v0 - vk, dim=0)
        self.h_bias.data += lr * torch.mean(h0_prob - hk_prob, dim=0)

# ======================= DBN ======================= #
class DBN(nn.Module):
    def __init__(self, layers):
        super(DBN, self).__init__()
        self.rbms = nn.ModuleList([RBM(layers[i], layers[i+1]) for i in range(len(layers)-1)])
        self.classifier = nn.Linear(layers[-1], 10)

    def pretrain(self, data_loader, epochs=5):
        input_data = []
        for idx, rbm in enumerate(self.rbms):
            print(f"Pretraining RBM Layer {idx+1}")
            for epoch in range(epochs):
                for x, _ in data_loader:
                    x = x.view(x.size(0), -1).to(device)
                    if input_data:
                        for prev_rbm in input_data:
                          x = torch.sigmoid(F.linear(x, prev_rbm.W, prev_rbm.h_bias))
                    rbm.contrastive_divergence(x)
            # Get the output transformation
            input_data.append(rbm)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        for rbm in self.rbms:
            x = torch.sigmoid(F.linear(x, rbm.W, rbm.h_bias))
        return self.classifier(x)

# =================== Traditional Deep Net =================== #
class DeepNet(nn.Module):
    def __init__(self):
        super(DeepNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(1024, 512), nn.ReLU(),
            nn.Linear(512, 256), nn.ReLU(),
            nn.Linear(256, 128), nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.net(x)

# ==================== Train & Evaluate ==================== #
def train(model, loader, epochs=5):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1} Loss: {total_loss:.4f}")

def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            out = model(x)
            _, pred = torch.max(out, 1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    acc = 100 * correct / total
    print(f"Accuracy: {acc:.2f}%")
    return acc

# ================== Run ================== #
dbn = DBN([1024, 512, 256, 128]).to(device)
dbn.pretrain(train_loader, epochs=3)
train(dbn, train_loader, epochs=5)
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import seaborn as sns
import matplotlib.pyplot as plt

def extract_features(model, loader):
    model.eval()
    features = []
    labels = []
    with torch.no_grad():
        for x, y in loader:
            x = x.view(x.size(0), -1).to(device)
            for rbm in model.rbms:
                x = torch.sigmoid(F.linear(x, rbm.W, rbm.h_bias))
            features.append(x.cpu())
            labels.append(y)
    return torch.cat(features), torch.cat(labels)

def plot_tsne(features, labels, title="t-SNE"):
    tsne = TSNE(n_components=2, perplexity=30, init='pca', learning_rate='auto')
    reduced = tsne.fit_transform(features)
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=reduced[:, 0], y=reduced[:, 1], hue=labels, palette='tab10', s=40, legend='full')
    plt.title(title)
    plt.show()

def plot_pca(features, labels, title="PCA"):
    pca = PCA(n_components=2)
    reduced = pca.fit_transform(features)
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=reduced[:, 0], y=reduced[:, 1], hue=labels, palette='tab10', s=40, legend='full')
    plt.title(title)
    plt.show()

# ========== Visualize DBN representations ==========
print("\n[Feature Visualization using t-SNE]")
features, labels = extract_features(dbn, test_loader)
plot_tsne(features.numpy(), labels.numpy(), title="DBN Representation (t-SNE)")

print("\n[Feature Visualization using PCA]")
plot_pca(features.numpy(), labels.numpy(), title="DBN Representation (PCA)")

print("\n[DBN Evaluation]")
evaluate(dbn, test_loader)

print("\nTraining DeepNet for comparison...")
deepnet = DeepNet().to(device)
train(deepnet, train_loader, epochs=5)
print("\n[DeepNet Evaluation]")
evaluate(deepnet, test_loader)


Pretraining RBM Layer 1
Pretraining RBM Layer 2
Pretraining RBM Layer 3
Epoch 1 Loss: 1792.5045
Epoch 2 Loss: 1789.1655
Epoch 3 Loss: 1789.3951
Epoch 4 Loss: 1789.2199
Epoch 5 Loss: 1789.2327

[Feature Visualization using t-SNE]


KeyboardInterrupt: 

In [None]:
pip install torch torchvision matplotlib scikit-learn seaborn


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision import transforms

# ---------------------------- Config ---------------------------- #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 64
EPOCHS_PRETRAIN = 5
EPOCHS_FINETUNE = 5
INPUT_SIZE = 32 * 32  # grayscale

# --------------------- Load CIFAR-10 (grayscale) --------------------- #
transform = transforms.Compose([
    transforms.Grayscale(),        # Convert RGB to 1 channel
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_set = CIFAR10(root='./data', train=True, transform=transform, download=True)
test_set = CIFAR10(root='./data', train=False, transform=transform, download=True)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE)

# ----------------------------- RBM ----------------------------- #
class RBM(nn.Module):
    def __init__(self, n_vis, n_hid):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(n_hid, n_vis) * 0.01)
        self.v_bias = nn.Parameter(torch.zeros(n_vis))
        self.h_bias = nn.Parameter(torch.zeros(n_hid))

    def sample_h(self, v):
        prob = torch.sigmoid(F.linear(v, self.W, self.h_bias))
        return prob.bernoulli(), prob

    def sample_v(self, h):
        prob = torch.sigmoid(F.linear(h, self.W.t(), self.v_bias))
        return prob.bernoulli(), prob

    def contrastive_divergence(self, v0, lr=0.1, k=1):
        vk = v0
        for _ in range(k):
            hk, _ = self.sample_h(vk)
            vk, _ = self.sample_v(hk)
        h0_prob = torch.sigmoid(F.linear(v0, self.W, self.h_bias))
        hk_prob = torch.sigmoid(F.linear(vk, self.W, self.h_bias))

        self.W.data += lr * (h0_prob.t() @ v0 - hk_prob.t() @ vk) / v0.size(0)
        self.v_bias.data += lr * torch.mean(v0 - vk, dim=0)
        self.h_bias.data += lr * torch.mean(h0_prob - hk_prob, dim=0)

# ------------------------- Deep Belief Network ------------------------- #
class DBN(nn.Module):
    def __init__(self, layer_sizes):
        super(DBN, self).__init__()
        self.rbms = nn.ModuleList([RBM(layer_sizes[i], layer_sizes[i+1]) for i in range(len(layer_sizes)-1)])
        self.classifier = nn.Sequential(
            nn.Linear(layer_sizes[-1], 10)
        )

    def pretrain(self, loader, epochs):
        data = None
        for idx, rbm in enumerate(self.rbms):
            print(f"Pretraining RBM Layer {idx+1}")
            for epoch in range(epochs):
                for x, _ in loader:
                    x = x.view(x.size(0), -1).to(device)
                    if data:
                        for prev_rbm in data:
                            x = torch.sigmoid(F.linear(x, prev_rbm.W, prev_rbm.h_bias))
                    rbm.contrastive_divergence(x)
            data = data or []
            data.append(rbm)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        for rbm in self.rbms:
            x = torch.sigmoid(F.linear(x, rbm.W, rbm.h_bias))
        return self.classifier(x)

# --------------------- Training & Evaluation --------------------- #
def train_supervised(model, loader, epochs):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            output = model(x)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f}")

def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            _, predicted = torch.max(outputs.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
    print(f"Accuracy: {100 * correct / total:.2f}%")

# ---------------------- Build and Run ---------------------- #
layer_sizes = [INPUT_SIZE, 512, 256, 128]
dbn = DBN(layer_sizes).to(device)

# Step 1: Pretrain RBMs
dbn.pretrain(train_loader, epochs=EPOCHS_PRETRAIN)

# Step 2: Fine-tune with supervision
train_supervised(dbn, train_loader, epochs=EPOCHS_FINETUNE)

# Step 3: Evaluate
print("\n[Evaluation on Test Set]")
evaluate(dbn, test_loader)


Pretraining RBM Layer 1
Pretraining RBM Layer 2
Pretraining RBM Layer 3
Epoch 1/5 - Loss: 1794.9199
Epoch 2/5 - Loss: 1790.5410
Epoch 3/5 - Loss: 1789.4708
Epoch 4/5 - Loss: 1788.1126
Epoch 5/5 - Loss: 1786.6670

[Evaluation on Test Set]
Accuracy: 13.80%


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# -------------------------------- Setup -------------------------------- #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 64
INPUT_SIZE = 32 * 32  # Grayscale image
NUM_CLASSES = 10
EPOCHS_PRETRAIN = 5
EPOCHS_FINETUNE = 5

# -------------------------- Load CIFAR-10 (Grayscale) -------------------------- #
transform = transforms.Compose([
    transforms.Grayscale(), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))
])
train_data = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_data = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE)

# ----------------------------- RBM Definition ----------------------------- #
class RBM(nn.Module):
    def __init__(self, n_vis, n_hid):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(n_hid, n_vis) * 0.01)
        self.v_bias = nn.Parameter(torch.zeros(n_vis))
        self.h_bias = nn.Parameter(torch.zeros(n_hid))

    def sample_h(self, v):
        prob = torch.sigmoid(F.linear(v, self.W, self.h_bias))
        return prob.bernoulli(), prob

    def sample_v(self, h):
        prob = torch.sigmoid(F.linear(h, self.W.t(), self.v_bias))
        return prob.bernoulli(), prob

    def contrastive_divergence(self, v0, lr=0.1):
        vk = v0
        for _ in range(1):  # CD-1
            hk, _ = self.sample_h(vk)
            vk, _ = self.sample_v(hk)
        h0 = torch.sigmoid(F.linear(v0, self.W, self.h_bias))
        hk = torch.sigmoid(F.linear(vk, self.W, self.h_bias))
        self.W.data += lr * (h0.t() @ v0 - hk.t() @ vk) / v0.size(0)
        self.v_bias.data += lr * torch.mean(v0 - vk, dim=0)
        self.h_bias.data += lr * torch.mean(h0 - hk, dim=0)

# --------------------------- DBN Model --------------------------- #
class DBN(nn.Module):
    def __init__(self, layer_sizes):
        super(DBN, self).__init__()
        self.rbms = nn.ModuleList([RBM(layer_sizes[i], layer_sizes[i+1]) for i in range(len(layer_sizes)-1)])
        self.classifier = nn.Sequential(nn.Linear(layer_sizes[-1], NUM_CLASSES))

    def pretrain(self, loader, epochs):
        data = []
        for idx, rbm in enumerate(self.rbms):
            print(f"Pretraining RBM Layer {idx+1}")
            for epoch in range(epochs):
                for x, _ in loader:
                    x = x.view(x.size(0), -1).to(device)
                    for prev_rbm in data:
                        x = torch.sigmoid(F.linear(x, prev_rbm.W, prev_rbm.h_bias))
                    rbm.contrastive_divergence(x)
            data.append(rbm)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        for rbm in self.rbms:
            x = torch.sigmoid(F.linear(x, rbm.W, rbm.h_bias))
        return self.classifier(x)

# --------------------------- DNN Model --------------------------- #
class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.ReLU(),
            nn.Linear(512, 256), nn.ReLU(),
            nn.Linear(256, 128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# -------------------- Training and Evaluation -------------------- #
def train(model, loader, epochs):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.CrossEntropyLoss()
    model.to(device)
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

def evaluate(model, loader, name="Model"):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            preds = model(x).argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)
    acc = 100 * correct / total
    print(f"[{name}] Accuracy on test set: {acc:.2f}%")
    return acc

# ---------------------------- Run ---------------------------- #
layer_sizes = [INPUT_SIZE, 512, 256, 128]
dbn = DBN(layer_sizes)
dnn = DNN()

# DBN Training
dbn.pretrain(train_loader, epochs=EPOCHS_PRETRAIN)
train(dbn, train_loader, epochs=EPOCHS_FINETUNE)
evaluate(dbn, test_loader, "DBN")

# DNN Training
train(dnn, train_loader, epochs=EPOCHS_FINETUNE + EPOCHS_PRETRAIN)  # Equal total epochs
evaluate(dnn, test_loader, "DNN (baseline)")


Pretraining RBM Layer 1
Pretraining RBM Layer 2
Pretraining RBM Layer 3
Epoch 1/5, Loss: 1794.7881
Epoch 2/5, Loss: 1789.5091
Epoch 3/5, Loss: 1790.2226
Epoch 4/5, Loss: 1790.3169
Epoch 5/5, Loss: 1789.5369
[DBN] Accuracy on test set: 12.94%
Epoch 1/10, Loss: 1463.4042
Epoch 2/10, Loss: 1306.0873
Epoch 3/10, Loss: 1209.6700
Epoch 4/10, Loss: 1140.7519
Epoch 5/10, Loss: 1077.3485
Epoch 6/10, Loss: 1014.1018
Epoch 7/10, Loss: 954.0838
Epoch 8/10, Loss: 897.6212
Epoch 9/10, Loss: 840.0668
Epoch 10/10, Loss: 784.0294
[DNN (baseline)] Accuracy on test set: 45.48%


45.48