In [2]:
from utils import *

# Restricted Bolzmann machines

$\textbf{v,h}\in \{0,1\}^{m+n}$
  
&nbsp; $m$ : number of visible node  
&nbsp; $n$ : number of hidden nodde  
$\textbf{v}$ : visilbe nodes $(v_1,v_2,\cdots,v_m)^T$ ($m \times 1 $ matrix)  
$\textbf{j}$ : hidden nodes $(h_1,h_2,\cdots,h_n)^T$  ($n \times 1 $ matrix)
    
$w_{i,j}, b_j, c_i \in \mathbb{R}$   
$W : n \times m $ matrix  
$b : m \times 1 $ matrix  
$c : n \times 1 $ matrix    
$$E(\textbf{v},\textbf{h})= -\sum_{i=1}^{n}\sum_{j=1}^{m}w_{ij}h_iv_j-\sum_{i=1}^nc_ih_i-\sum_{j=1}^{m}b_jv_j$$
$$
= -\mathbf{w}^TW\mathbf{v}-c^T\mathbf{h}-b^T\mathbf{v}
$$


$$p(\mathbf{v,h}) = \frac{e^{-E(\mathbf{v,h})}}{\sum_{\mathbf{v,h}}e^{-E(\mathbf{v,h})}}$$

# Likelihood
$$\mathcal{L}(\theta|v)=p(\mathbf{v}|\theta)
=\frac{\sum_{\mathbf{h}}e^{-E(\mathbf{v,h})}}{\sum_{\mathbf{v,h}}e^{-E(\mathbf{v,h})}}$$

$$\ln\mathcal{L}(\theta|v)=\ln p(\mathbf{v}|\theta)
=\ln{\sum_{\mathbf{h}}e^{-E(\mathbf{v,h})}}
-\ln{\sum_{\mathbf{v,h}}e^{-E(\mathbf{v,h})}}$$

$$\frac{\partial\ln\mathcal{L}(\theta|v)}{\partial \theta}
=\frac{\partial}{\partial \theta} \Big{(} \ln{\sum_{\mathbf{h}}e^{-E(\mathbf{v,h})}} \Big{)}
-\frac{\partial}{\partial \theta} \Big{(} \ln{\sum_{\mathbf{v,h}}e^{-E(\mathbf{v,h})}} \Big{)}$$

$$=-\frac{1}{\sum_{\mathbf{h}}e^{-E(\mathbf{v,h})}}
\sum_{\mathbf{h}}e^{-E(\mathbf{v,h})}\frac{\partial E(\mathbf{v,h})}{\partial \theta}
+\frac{1}{\sum_{\mathbf{v,h}}e^{-E(\mathbf{v,h})}}
\sum_{\mathbf{v,h}}e^{-E(\mathbf{v,h})}\frac{\partial E(\mathbf{v,h})}{\partial \theta}
$$

$$=-\sum_{\mathbf{h}}p(\mathbf{h}|\mathbf{v})\frac{\partial E(\mathbf{v,h})}{\partial \theta}
+
\sum_{\mathbf{v,h}}p(\mathbf{v},\mathbf{h})\frac{\partial E(\mathbf{v,h})}{\partial \theta}
$$
### note
$$p(\mathbf{h}|\mathbf{v})=\frac{p(\mathbf{v,h})}{p(\mathbf{v})}
=\frac{\frac{1}{Z}e^{-E(\mathbf{v,h})}}{\frac{1}{Z}\sum_{\mathbf{h}}e^{-E(\mathbf{v,h})}}
(Z \textsf{ is nomalizing constant})$$

$$p(\mathbf{h}|\mathbf{v})=\prod_{i=1}^n p(h_i|\mathbf{v})$$

$$p(H_i =1 | \mathbf{v}) = sigmoid(\sum_{j=1}^m w_{ij}v_j + c_i)$$
$$p(V_j =1 | \mathbf{h}) = sigmoid(\sum_{j=1}^n w_{ij}v_j + b_j)$$ 

### Gradient of Liklihood
$\theta = w_{ij},b_{i},c_{i}$
$$\frac{\partial\ln\mathcal{L}(\theta|v)}{\partial w_{ij}}=-\sum_{\mathbf{h}}p(\mathbf{h}|\mathbf{v})\frac{\partial E(\mathbf{v,h})}{\partial w_{ij}}
+
\sum_{\mathbf{v,h}}p(\mathbf{v},\mathbf{h})\frac{\partial E(\mathbf{v,h})}{\partial w_{ij}}
$$

$$=\sum_{\mathbf{h}}p(\mathbf{h}|\mathbf{v})h_iv_j
+\sum_{\mathbf{v,h}}p(\mathbf{v},\mathbf{h})h_iv_j
$$

In [5]:
class RBM:
    def __init__(self, m=10, n=5, k = 100):
        '''
            m : number of visible nodes
            n : number of hidden nodes
        '''
        self.visible_node = m
        self.hidden_node = n
        self.k = k
        
        self.W = tf.Variable(initialize_variable([self.hidden_node, self.visible_node], Type = 'uniform'), name = 'weights')
        self.b = tf.Variable(initialize_variable([self.visible_node], Type = 'uniform'), name = 'visible_biases')
        self.c = tf.Variable(initialize_variable([self.hidden_node], Type = 'uniform'), name = 'hidden_biases')
        
        #Should be binary
        self.visible = tf.placeholder(tf.float32, [None, self.visible_node])
        self.hidden = tf.placeholder(tf.float32, [None, self.hidden_node])
        
        
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
        
        print_tensor(self.sess, self.W)
        print_tensor(self.sess, self.b)
        print_tensor(self.sess, self.c)
            
    def get_free_energy(self, v, h):
        '''
            input
                v : 1D tensor m
                h : 1D tensor n
            return
                free energy
        '''
        if h.get_shape().ndims!=1 or v.get_shape().ndims!=1:
            raise ValueError("Dimension should be 1 but dimension h : {} and v : {}"
                                .format(h.get_shape().ndims, v.get_shape().ndims))
            
        if h.get_shape()[0]!=self.W.get_shape()[0] or v.get_shape()[0]!=self.W.get_shape()[1]:
            raise ValueError("Size note matches with variables")
        
        E1 = tf.matmul(tf.reshape(h, [1, -1]),self.W)
        E1 = tf.matmul(E1,tf.reshape(v, [-1, 1]))
        E1 = tf.reshape(E1, [1])
        E2 = tf.reshape(tf.matmul(tf.reshape(self.b, [1, -1]), tf.reshape(v,[-1, 1])), [1])
        E3 = tf.reshape(tf.matmul(tf.reshape(self.c, [1, -1]), tf.reshape(h,[-1, 1])), [1])
        energy = -E1-E2-E3
        print_tensor(self.sess, -E1-E2-E3)
        return energy


In [6]:
r = RBM(3,2)
a = tf.constant([1,0,0], dtype =tf.float32)
b = tf.constant([1,0],dtype = tf.float32)
r.get_free_energy(a, b)

weights_1:0
(2, 3)
[[ 0.49036193 -0.56113029 -0.0844965 ]
 [ 0.18333745  0.67606497 -0.73813701]]
visible_biases_1:0
(3,)
[ 0.84280586 -0.60020018  0.71641779]
hidden_biases_1:0
(2,)
[ 0.60457277 -0.66297674]
sub_7:0
(1,)
[-1.93774056]


<tf.Tensor 'sub_5:0' shape=(1,) dtype=float32>