In [275]:
import torch
import matplotlib.pyplot as plt
from itertools import product

In [558]:
class Ising:
    
    # 초기화
    def __init__(self, num_row, num_col):
        self.num_row, self.num_col = num_row, num_col
        self.gen_system()
        
    # (r x s) System configuration을 랜덤으로 생성    
    def gen_system(self):
        self.spins = torch.where(torch.rand(self.num_row, self.num_col) < 0.5, torch.ones(1), -torch.ones(1))
    
    # System의 Hamiltonian를 계산
    def Hamiltonian(self):
        H, J = 0., 1.
        for i, j in product(range(self.num_row), range(self.num_col)):
            for i_nhb, j_nhb in self.get_neighbors(i, j):
                H -= J*self.spins[i_nhb, j_nhb]*self.spins[i, j]
        return H/2
            
    # 격자 (i, j)의 neighbors를 리턴
    def get_neighbors(self, i, j):
        assert (i >= 0) and (i <= self.num_row-1) and (j >= 0) and (j <= self.num_col-1)
        nhb = []
        if j != self.num_col - 1: nhb.append([i, j+1])
        if j != 0: nhb.append([i, j-1])
        if i != self.num_row - 1: nhb.append([i+1, j])
        if i != 0: nhb.append([i-1, j])
        return nhb
    
    # n개의 데이터 생성 (Hamiltonian이 낮을수록 생성 확률 높음)
    def gen_train_data(self, num_data):
        self.gen_system()
        H = self.Hamiltonian()
        data = ((1-self.spins.view(-1))/2).unsqueeze(0)
        for _ in range(num_data-1):
            self.gen_system()
            H_new = self.Hamiltonian()
            p = torch.clamp(torch.exp(-(H_new-H)), max=1)
            if torch.rand(1) < p:
                v = ((1-self.spins.view(-1))/2).unsqueeze(0)
                H = H_new
            else:
                v = data[-1].unsqueeze(0)  
            data = torch.cat([data, v])
        return data

In [272]:
# Creating the RBM Architecture (weights, biases)
class RBM():
    
    # Initiate RBM parameters
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(nh)
        self.b = torch.randn(nv)
    
    def Hamiltonian(self, v, h): 
        ah = torch.dot(self.a, h)
        bv = torch.dot(self.b, v)
        hWv = torch.dot(h, torch.mv(self.W, v))
        H = - ah - bv - hWv
        return H
    
    def FreeEnergy(self, v):
        bv = torch.dot(self.b, v)
        Wv = torch.mv(self.W, v)
        F = - bv
        for i in range(Wv.size()[0]):
            F -= torch.log(1 + torch.exp(self.a[i] + Wv[i]))
        return F
    
    # Calculate p(v = D[i]) using Softmax
    def p_v(self, D):
        # Free Energies of each v = D[i]
        F = torch.tensor(D.size()[0])
        for i in range(D.size()[0]):
            F[i] = self.FreeEnergy(D[i])
            
        # p(v = D[i]) = Softmax(-F)[i] = exp(-F[i])/Z
        p_v = F.softmax(- F, dim = 0)
        return p_v
    
    # Calculate Negative Log-Likelihood using log_softmax
    def NLL(self, D):
        # Free Energies of each v = D[i]
        F = torch.zeros(D.size()[0])
        for i in range(D.size()[0]):
            F[i] = self.FreeEnergy(D[i])
            
        # p(v = D[i]) = Softmax(-F)[i] = exp(-F[i])/Z
        LSM = F.log_softmax(- F, dim = 0)
        NLL = - torch.mean(LSM)
        return NLL
    
    def sigmoid_i(self, D, idx):
        a = self.a
        WD_i = torch.mv(self.W, D[idx])
        sigmoid = torch.sigmoid(a + WD_i)
        return sigmoid
    
    def grad_F_i(self, D, idx, param):
        
        grad_F_i = torch.zeros_like(param)
        
        if param == self.W:
            for j in range(grad_F_i.size()[0]):
                for k in range(grad_F_i.zie()[1]):
                    grad_F_i[j,k] = - self.sigmoid_i(D, idx)[j] * D[idx][k]
        
        elif param == self.a:
            for j in range(grad_F_i.size()[0]):
                grad_F_i[j] = - self.sigmoid_i(D, idx)[j]
        
        elif param == self.b:
            for j in range(grad_F_i.size()[0]):
                grad_F_i[j] = - D[idx][j]
        
        return grad_F_i
        
    # Gradients of Negative Log-Likelihood
    def grad_NLL(self, D, param):
        
        grad_NLL = torch.zeros_like(param)
        nData = D.size()[0]
        
        for idx in range(nData):
            grad_NLL += (1 / nData - self.p_v(D)[idx]) * self.grad_F_i(D, idx, param)
        
        return grad_NLL
    
    # Update the RBM parameters
    def update(self, D, learning_rate):

        grad_NLL_w = self.grad_NLL(D, self.W)
        grad_NLL_a = self.grad_NLL(D, self.a)
        grad_NLL_b = self.grad_NLL(D, self.b)
        
        self.W -= learning_rate * grad_NLL_w
        self.a -= learning_rate * grad_NLL_a
        self.b -= learning_rate * grad_NLL_b

In [568]:
ising = Ising(3, 3)
train_data = ising.gen_train_data(10)
train_data

tensor([[1., 0., 1., 1., 1., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0., 1., 1., 1., 1.],
        [0., 0., 1., 0., 0., 1., 1., 1., 1.],
        [0., 0., 1., 0., 0., 1., 1., 1., 1.],
        [0., 1., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0.]])

In [571]:
rbm = RBM(D.size()[1], nh = 4)

In [573]:
# Train the RBM
num_epoch = 100
#batch_size = 1\
lr = 1e-3  #learning_rate

for epoch in range(0, num_epoch + 1):
        
    if epoch > 0:
        rbm.update(D, lr)
    
    print('epoch {}: W = {}'.format(epoch, rbm.W))
    print('\t a = {}'.format(rbm.a))
    print('\t b = {}'.format(rbm.b))

epoch 0: W = tensor([[ 0.1156,  1.8399, -0.6726,  2.0601, -0.3161, -0.4336, -1.5100, -1.3203,
         -0.4472],
        [-0.7756,  0.3477, -0.3959,  0.5800, -0.6545, -1.4602,  0.6614, -0.1726,
         -0.8117],
        [ 1.1187, -1.2118,  0.0658,  0.9374, -1.2990, -1.7850,  0.0266,  0.1419,
         -1.1713],
        [-0.8976, -1.9839, -1.2400,  2.0431,  0.5771, -1.2451,  0.2496, -1.8348,
         -0.3367]])
	 a = tensor([-0.6742,  0.0440,  1.1494,  0.2319])
	 b = tensor([ 0.6227,  0.0780, -1.0363, -0.7545, -0.4662,  2.0998, -1.5583,  1.3050,
        -1.3130])


IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number