# Boltzmann Machines
<hr>

#### Dataset:- https://files.grouplens.org/datasets/movielens/ml-100k.zip, https://files.grouplens.org/datasets/movielens/ml-1m.zip

#### Dataset For Testing:- https://grouplens.org/datasets/movielens/

#### Reference Book:- [An Introduction to Restricted Boltzmann Machines](Reference/An%20Introduction%20to%20Restricted%20Boltzmann%20Machines.pdf)

<hr>

## Restricted Boltzmann Machines (RBMs)
<hr>

### Importing the Libraries

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

<hr>

### Importing the dataset

In [None]:
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
movies

In [None]:
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users

In [None]:
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings

<hr>

### Preparing the training set and the test set

In [None]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set

In [None]:
training_set = np.array(training_set, dtype = 'int')
training_set

In [None]:
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set

In [None]:
test_set = np.array(test_set, dtype = 'int')
test_set

<hr>

### Getting the number of users and movies

In [None]:
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_users

In [None]:
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))
nb_movies

<hr>

### Converting the data into an array with users in lines and movies in columns

In [None]:
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:, 1] [data[:, 0] == id_users]
        id_ratings = data[:, 2] [data[:, 0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data

In [None]:
training_set = convert(training_set)
test_set = convert(test_set)

In [None]:
training_set

In [None]:
test_set

<hr>

### Converting the data into Torch tensors

In [None]:
training_set = torch.FloatTensor(training_set)
training_set

In [None]:
test_set = torch.FloatTensor(test_set)
test_set

<hr>

### Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)

In [None]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

<hr>

### Creating the Architecture of the Neural Network

In [None]:
class RBM():
    def __init__(self, nv, nh): # nv = No. of Visible Nodes, nh = No. of Hidden Nodes
        # Initializing the weights
        self.W = torch.randn(nh, nv)

        # Initializing the bias for the hidden nodes
        self.a = torch.randn(1, nh)

        # Initializing the bias for the visible nodes
        self.b = torch.randn(1, nv)


    # Function for returning different samples for hidden nodes
    def sample_h(self, x): # "x" -> to the vector visible neurons "v" in the probabilities "p" of "h (hidden nodes)", given "v (visible nodes)"
        wx = torch.mm(x, self.W.t())
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)


    # Function for returning different samples for visible nodes
    def sample_v(self, y): # "y" -> to the vector hidden neurons "h" in the probabilities "p" of "v (visible nodes)", given "h (hidden nodes)"
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    

    # Defining the Contrastive Divergence
    def train(self, v0, vk, ph0, phk):
        # v0 -> input vector containing the ratings of all the movies by one user
        # vk -> visible nodes obtained after "k" samplings
        # ph0 -> vector of probabilities that at the first iteration the hidden nodes = 1 given the value of "v0"
        # phk -> probability of visible nodes obtained after "k" samplings given the value of "vk"

        # Updating the Weight
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()

        # Updating the bias "b"
        self.b += torch.sum((v0 - vk), 0)

        # Updating the bias "a"
        self.a += torch.sum((ph0 - phk), 0)

<hr>

### Creating the object of RBM Class

In [None]:
nv = len(training_set[0])
nh = 100
batch_size = 100

rbm = RBM(nv, nh)

<hr>

### Training the RBM

In [None]:
# No. of epochs
nb_epochs = 10

for epoch in range(1, nb_epochs + 1):
    train_loss = 0
    s = 0.  # Counter

    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user + batch_size]
        v0 = training_set[id_user:id_user + batch_size]
        ph0, _ = rbm.sample_h(v0)

        for k in range(10):
            _, hk = rbm.sample_h(vk) # hk -> "h" hidden nodes obtained at the "k"th step of contrastive divergence
            _, vk = rbm.sample_v(hk)
            vk[v0 < 0] = v0[v0 < 0]
        
        phk, _ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
        s += 1.

    print("epoch: " + str(epoch) + " loss: " + str(train_loss/s))

<hr>

### Testing the RBM

In [None]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0])) # Average Distance here
        s += 1.
print('test loss: '+str(test_loss/s))

<hr>

### Evaluating The Boltzmann Machine

Hi guys,
<br>&emsp;the two ways of evaluating our RBM are with the *RMSE* and the *Average Distance*.

#### RMSE:

* The RMSE (Root Mean Squared Error) is calculated as the root of the mean of the squared differences between the predictions and the targets.

Here is the code that computes the RMSE:

**Training phase:**

        nb_epoch = 10
        for epoch in range(1, nb_epoch + 1):
            train_loss = 0
            s = 0.
            for id_user in range(0, nb_users - batch_size, batch_size):
                vk = training_set[id_user:id_user+batch_size]
                v0 = training_set[id_user:id_user+batch_size]
                ph0,_ = rbm.sample_h(v0)
                for k in range(10):
                    _,hk = rbm.sample_h(vk)
                    _,vk = rbm.sample_v(hk)
                    vk[v0<0] = v0[v0<0]
                phk,_ = rbm.sample_h(vk)
                rbm.train(v0, vk, ph0, phk)
                train_loss += np.sqrt(torch.mean((v0[v0>=0] - vk[v0>=0])**2)) # RMSE here
                s += 1.
            print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

**Test phase:**

        test_loss = 0
        s = 0.
        for id_user in range(nb_users):
            v = training_set[id_user:id_user+1]
            vt = test_set[id_user:id_user+1]
            if len(vt[vt>=0]) > 0:
                _,h = rbm.sample_h(v)
                _,v = rbm.sample_v(h)
                test_loss += np.sqrt(torch.mean((vt[vt>=0] - v[vt>=0])**2)) # RMSE here
                s += 1.
        print('test loss: '+str(test_loss/s))

* Using the RMSE, our RBM would obtain an error around `0.46`. But be careful, although it looks similar, one must not confuse the RMSE and the Average Distance. A RMSE of `0.46` doesn’t mean that the average distance between the prediction and the ground truth is `0.46`. In random mode we would end up with a RMSE around `0.72`. An error of `0.46` corresponds to `75%` of successful prediction.

#### Average Distance:

* If you prefer to play with the Average Distance, I understand, it’s more intuitive. And that’s what we used in the practical tutorials to evaluate our RBM model:

**Training phase:**

        nb_epoch = 10
        for epoch in range(1, nb_epoch + 1):
            train_loss = 0
            s = 0.
            for id_user in range(0, nb_users - batch_size, batch_size):
                vk = training_set[id_user:id_user+batch_size]
                v0 = training_set[id_user:id_user+batch_size]
                ph0,_ = rbm.sample_h(v0)
                for k in range(10):
                    _,hk = rbm.sample_h(vk)
                    _,vk = rbm.sample_v(hk)
                    vk[v0<0] = v0[v0<0]
                phk,_ = rbm.sample_h(vk)
                rbm.train(v0, vk, ph0, phk)
                train_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0])) # Average Distance here
                s += 1.
            print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

**Test phase:**

        test_loss = 0
        s = 0.
        for id_user in range(nb_users):
            v = training_set[id_user:id_user+1]
            vt = test_set[id_user:id_user+1]
            if len(vt[vt>=0]) > 0:
                _,h = rbm.sample_h(v)
                _,v = rbm.sample_v(h)
                test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0])) # Average Distance here
                s += 1.
        print('test loss: '+str(test_loss/s))

* With this metric, we obtained an Average Distance of `0.24`, which is equivalent to about `75%` of correct prediction.

* Hence, it works very well and there is a predictive power.

* If you want to check that `0.25` corresponds to `75%` of success, you can run the following test:

        import numpy as np
        u = np.random.choice([0,1], 100000)
        v = np.random.choice([0,1], 100000)
        u[:50000] = v[:50000]
        sum(u==v)/float(len(u)) # -> you get 0.75
        np.mean(np.abs(u-v)) # -> you get 0.25
        so 0.25 corresponds to 75% of success.

*Enjoy Deep Learning!*

In [None]:
import numpy as np

u = np.random.choice([0,1], 100000)
v = np.random.choice([0,1], 100000)

u[:50000] = v[:50000]

print(sum(u==v)/float(len(u))) # -> you get 0.75

print(np.mean(np.abs(u-v))) # -> you get 0.2489 which is approx. 0.25