In [2]:
import torch
import torch.distributions as dist
import torch.nn as nn
import torch.nn.functional as Fcn
import torch.optim as optim

In [3]:
# Creating Ising spins and Calculating Hamiltonian of the Ising system
class Ising():
    
    def __init__(self, nRow, nCol):
        self.spins = torch.zeros(nRow, nCol)
        self.probs = torch.rand(nRow, nCol)
        for i in range(nRow):
            for j in range(nCol):
                if self.probs[i][j] < 0.5:
                    self.spins[i][j] = 1
                else:
                    self.spins[i][j] = -1
    
    def Hamiltonian(self):
        H = 0.
        J = 1.
        nRow = self.spins.size()[0]
        nCol = self.spins.size()[1]
        for i in range(nRow):
            for j in range(nCol):
                if i < 1:
                    H -= J * self.spins[i][j] * self.spins[i+1][j]
                elif i > nRow - 2:
                    H -= J * self.spins[i][j] * self.spins[i-1][j]
                else:
                    H -= J * self.spins[i][j] * self.spins[i+1][j]
                    H -= J * self.spins[i][j] * self.spins[i-1][j]
                
                if j < 1:
                    H -= J * self.spins[i][j] * self.spins[i][j+1]
                elif j > nCol - 2:
                    H -= J * self.spins[i][j] * self.spins[i][j-1]
                else:
                    H -= J * self.spins[i][j] * self.spins[i][j+1]
                    H -= J * self.spins[i][j] * self.spins[i][j-1]
        return H/2   #to avoid double count

In [4]:
# Creating the RBM Architecture (weights, biases)
class RBM():
    
    # Initiate RBM parameters
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(nh)
        self.b = torch.randn(nv)
    
    def Hamiltonian(self, v, h): 
        ah = torch.dot(self.a, h)
        bv = torch.dot(self.b, v)
        hWv = torch.dot(h, torch.mv(self.W, v))
        H = - ah - bv - hWv
        return H
    
    def FreeEnergy(self, v):
        bv = torch.dot(self.b, v)
        Wv = torch.mv(self.W, v)
        F = - bv
        for i in range(Wv.size()[0]):
            F -= torch.log(1 + torch.exp(self.a[i] + Wv[i]))
        return F
    
    # Calculate p(v = D[i]) using Softmax
    def p_v(self, Batch):
        # Free Energies of each v = D[i]
        batch_size = Batch.size()[0]
        F = torch.zeros(batch_size)
        for i in range(batch_size):
            F[i] = self.FreeEnergy(Batch[i]).item()
            
        # p(v = D[i]) = Softmax(-F)[i] = exp(-F[i])/Z
        p_v = Fcn.softmax(-F, dim=0)
        return p_v
    
    # Calculate Negative Log-Likelihood using log_softmax
    def NLL(self, D):
        # Free Energies of each v = D[i]
        F = torch.zeros(D.size()[0])
        for i in range(D.size()[0]):
            F[i] = self.FreeEnergy(D[i]).item()
            
        # p(v = D[i]) = Softmax(-F)[i] = exp(-F[i])/Z
        LSM = Fcn.log_softmax(-F, dim=0)
        NLL = - torch.mean(LSM)
        return NLL
    
    def sigmoid_i(self, Batch, idx):
        a = self.a
        Wv = torch.mv(self.W, Batch[idx])
        sigmoid = torch.sigmoid(a + Wv)
        return sigmoid
    
    def grad_F_i(self, Batch, idx, param):
        
        if param == 'W':
            grad_F_i = torch.zeros_like(self.W)
            for j in range(grad_F_i.size()[0]):
                for k in range(grad_F_i.size()[1]):
                    grad_F_i[j,k] = - self.sigmoid_i(Batch, idx)[j] * Batch[idx][k]
                    
        elif param == 'a':
            grad_F_i = - self.sigmoid_i(Batch, idx)
            
        elif param == 'b':
            grad_F_i = - Batch[idx]
        
        return grad_F_i
        
    # Gradients of Negative Log-Likelihood
    def grad_NLL(self, Batch, param):
        
        if param == 'W':
            grad_NLL = torch.zeros_like(self.W)
        elif param == 'a':
            grad_NLL = torch.zeros_like(self.a)
        elif param == 'b':
            grad_NLL = torch.zeros_like(self.b)
        
        batch_size = Batch.size()[0]
        
        for idx in range(batch_size):
            grad_NLL += (1 / batch_size - self.p_v(Batch)[idx]) * self.grad_F_i(Batch, idx, param)
        
        return grad_NLL
    
    # Update the RBM parameters
    def update(self, Batch, learning_rate):

        grad_NLL_w = self.grad_NLL(Batch, 'W')
        grad_NLL_a = self.grad_NLL(Batch, 'a')
        grad_NLL_b = self.grad_NLL(Batch, 'b')
        
        self.W -= learning_rate * grad_NLL_w
        self.a -= learning_rate * grad_NLL_a
        self.b -= learning_rate * grad_NLL_b

In [71]:
# Fuction to create a training data set by using Metropolis algorithm
def Data_for_train(data_size, nRow, nCol):

    for i in range(data_size):
    
        ising = Ising(nRow, nCol)
        H_new = ising.Hamiltonian()
    
        # Reshape of a matrix of Ising spins to a vector as the visible layer
        spin = ising.spins.view(nRow*nCol)
        v = (1 - spin)/2   # spin 1 --> 0 ,   spin -1 --> 1
    
        # save visible layers as row vectors of the training data matrix
        if i == 0:
            data = v.unsqueeze(0)
        else:
            if H_new <= H:
                data = torch.cat((data, v.unsqueeze(0)), dim = 0)
            else:
                B_sample = torch.bernoulli(torch.exp(H - H_new))
                if B_sample == 1:
                    data = torch.cat((data, v.unsqueeze(0)), dim = 0)
                else:
                    data = torch.cat((data, data[i-1].unsqueeze(0)), dim = 0)
        H = H_new
        
    return data

In [72]:
def decimal_to_binary_tensor(value, width=0):
    string = format(value, '0{}b'.format(width))
    binary = [0 if c == '0' else 1 for c in string]
    return torch.tensor(binary, dtype=torch.float)

In [73]:
def Entropy_data(Data):
    data_size = Data.size()[0]
    data_length = Data.size()[1]
    num_state = 2**data_length
    count = torch.zeros(num_state)
    for idx_state in range(num_state):
        for idx_data in range(data_size):
            bin_state = decimal_to_binary_tensor(idx_state, width=data_length)
            if torch.all(torch.eq(Data[idx_data], bin_state)) == 1:
                count[idx_state] += 1
    
    prob = count / data_size
    
    Entropy = 0
    for idx_state in range(num_state):
        if prob[idx_state] > 0:
            Entropy -= prob[idx_state] * torch.log(prob[idx_state])
            
    return Entropy

initiate the data set and RBM parameters

In [142]:
# make a training data set
nRow = 3
nCol = 3
data_size = 100
data_length = nRow*nCol

D = Data_for_train(data_size, nRow, nCol)
S = Entropy_data(D)
print(S)

tensor(3.9743)


In [143]:
# initiate RBM parameters
nv = data_length
nh = 10
rbm = RBM(nv, nh)

In [144]:
# Train the RBM
num_epoch = 10
batch_size = 10
learning_rate = 1e-1

for epoch in range(0, num_epoch + 1):
    if epoch > 0:
        for batch_idx in range(int(data_size / batch_size)):
            Batch = D[batch_idx * batch_size : (batch_idx + 1) * batch_size]
            rbm.update(Batch, learning_rate)
        
        loss_new = rbm.NLL(D) - S
        if (loss - loss_new) < 0.001 and loss_new > S * 0.05:
            rbm = RBM(nv, nh)
     
        loss = loss_new
    
    else:
        loss = rbm.NLL(D) - S
        
    print('epoch {}: loss = {}'.format(epoch, loss))
#    print('\t W = {}'.format(rbm.W))
#    print('\t a = {}'.format(rbm.a))
#    print('\t b = {}'.format(rbm.b))      

epoch 0: loss = 4.922558784484863
epoch 1: loss = 2.1298367977142334
epoch 2: loss = 1.1264026165008545
epoch 3: loss = 0.8990771770477295
epoch 4: loss = 0.8246471881866455
epoch 5: loss = 0.791759729385376
epoch 6: loss = 0.7740018367767334
epoch 7: loss = 0.7627289295196533
epoch 8: loss = 0.7545626163482666
epoch 9: loss = 0.7480294704437256
epoch 10: loss = 0.7424390316009521
