<a href="https://colab.research.google.com/github/shpcode/Restricted-Boltzmann-Machine-RBM-/blob/main/ais_rbm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
# this class estimate log(Z) of an rbm model by Annealing Importance Sampling (AIS)

''' in AIS we use chain of distributuion without knowing their normalization Z
    consider unromlaized distribution as  p(v,beta)= exp(-E(v,beta))
    -E(v,beta) = (1-beta) B*v + beta ( b*v + c*h + sum W*v*h )
    first term of eneregy generating the trivial distribution and it is dominant at beta=0
    second term is the true rbm distribution and it is dominant at beta= 1
    AIS anneal from bete=0 to beta=1 to produce chain of distrubutins
'''

class AIS_rbm:

  def __init__(self,v_dim,h_dim,the_rbm_model,visible_average_activity):
    ''' initial parameters
          v_dim >> number of visible units
          h_dim >> number of hidden units
          the_rbm_model >> parameters of rbm model that we want to estimate its log(z)
                            insert as the_rbm_model =(w,b,c)
                            where w is weights, b is visible bias , c is hidden bias
                            with fallowing sizes [W]=(v_dim, h_dim) , [b]=(1,v_dim), [c]=(1,h_dim)

          visible_average_activity >> we use this as the trivial ditribution to start sampling
                                      for MINST dataset this means the average values of visivle units
                                      over traning data set. shape: (1, v_dim)
                                      '''
    (self.w,self.b,self.c) = the_rbm_model
    self.N_v = v_dim
    self.N_h = h_dim

    self.v_ave  =visible_average_activity
    (self.B , self.log_z_0) = self.trivial_dist()


  def trivial_dist(self,):
    '''this is trivial distribution of visible units to start sampling.
        here we set this equal to average activity of visible and compute trivial log_z_0 '''
    B = -np.log(1/self.v_ave -1)
    B[B==- np.inf]=-10
    log_z_0 = self.N_h * np.log(2) +np.log(1+np.exp(B)).sum()
    return B, log_z_0

  def log_p_v(self, v, beta):
    ''' ais compute ratio of chain of distributios. but it is easier to code log of this ration
        this function compute unormalized p(v) for give beta'''
    alpha = beta *(np.matmul(v,self.w).squeeze()+self.c.repeat(self.num_samples,0))
    v_bias = (1 - beta) * np.matmul(v,self.B.T) + beta * np.matmul(v,self.b.T)
    return np.log(1+np.exp(alpha)).sum(1,keepdims=True) + v_bias

  def sample_v_given_h (self, h,beta):
    e = beta*( np.matmul(h,self.w.transpose()) + self.b.repeat(self.num_samples,0)) + (1-beta)* self.B.repeat(self.num_samples,0)
    p_v =  1 /(1+ np.exp( -e ))
    return np.random.binomial(1,p_v)

  def sample_h_given_v (self,v,beta):
    e = beta*( (np.matmul(v,self.w).squeeze() + self.c.repeat(self.num_samples,0)) )
    p_h =  1 /(1+ np.exp( -e ))
    return np.random.binomial(1,p_h)

  def log_sum_exp(self,x, axis=0):
      alpha = x.max(axis) - np.log(np.finfo(np.float64).max) / 2.0
      if axis == 1:
          return np.squeeze(alpha + np.log(np.sum(np.exp(x.T - alpha), axis=0)))
      else:
          return np.squeeze(alpha + np.log(np.sum(np.exp(x - alpha), axis=0)))

  def log_z(self,num_samples,anneal_steps,k):
    ''' this function perfomr anneal steps and compute log_ratio to get log_z'''
    self.num_samples = num_samples
    betas =np.longdouble( np.linspace(0.0, 1.0,anneal_steps))

    # first sample of v from trvial distribution
    h = np.zeros((num_samples, self.N_h))
    v = self.sample_v_given_h (h,betas[0])
    log_ratio = - self.log_p_v(v,betas[0])

    # anneal series of p_v(v,beta)
    for beta in betas[1:betas.shape[0] - 1]:

      log_ratio += self.log_p_v(v,beta)
      ###
      for _ in range(k):
        h = self.sample_h_given_v (v,beta)
        v = self.sample_v_given_h (h,beta)
      ###
      log_ratio -= self.log_p_v(v,beta)

    log_ratio += self.log_p_v(v,betas[-1])

    return self.log_sum_exp(log_ratio, axis=0) - np.log(self.num_samples) + self.log_z_0


In [4]:
''' How to use AIS_rbm
>>read initial parameter inside AIS_rbm
>> initialize the AIS_rbm parameters during traning rbm when computing log_z is desirable
>>> then call method log_z to estimate log_z
    method log_z takes three inputs
1. num_samples is number of sample AIS do. set it 50
2. anneal_steps is number step between 0 and 1 to anneal beta. normally 1000 steps, 100 is also worked
3. k number of gibs chain. this is different from cd_k and traning rbm. set it 1 '''

# # so each time you want to compute logz call fallowing
# estimate = AIS_rbm(v_dim = 784 , h_dim =16,the_rbm_model=(w,b,c),visible_average_activity=v_m)
# estimate.log_z(50,100,k=1)

' How to use AIS_rbm\n>>read initial parameter inside AIS_rbm\n>> initialize the AIS_rbm parameters during traning rbm when computing log_z is desirable\n>>> then call method log_z to estimate log_z\n    method log_z takes three inputs\n1. num_samples is number of sample AIS do. set it 50\n2. anneal_steps is number step between 0 and 1 to anneal beta. normally 1000 steps, 100 is also worked\n3. k number of gibs chain. this is different from cd_k and traning rbm. set it 1 '