# Restricted Boltzmann Machine

**1. Importing the Libraries**

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
# for parallel computations
import torch.nn.parallel
# for optimizer
import torch.optim as optim
# for tools that we use
import torch.utils.data
# for stochastic gradient descent
from torch.autograd import Variable

**2.1. Importing the 'Movies' dataset**

In [2]:
# We won't be using this dataset.
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
movies.head(5)

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


**2.2. Importing the 'Users' dataset**

In [3]:
# We won't be using this dataset.
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users.head(5)

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


**2.3. Importing the 'Ratings' dataset**

In [4]:
# We won't be using this dataset.
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings.head(5)

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


**3. Preparing the training set and the test set**

In [5]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
training_set.shape

(79999, 4)

In [6]:
pd.DataFrame(training_set).head(5)

Unnamed: 0,0,1,2,3
0,1,2,3,876893171
1,1,3,4,878542960
2,1,4,3,876893119
3,1,5,3,889751712
4,1,7,4,875071561


In [7]:
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')
test_set.shape

(19999, 4)

In [8]:
pd.DataFrame(test_set).head(5)

Unnamed: 0,0,1,2,3
0,1,10,3,875693118
1,1,12,5,878542960
2,1,14,5,874965706
3,1,17,3,875073198
4,1,20,4,887431883


**4. Getting the number of users and movies**

In [9]:
number_of_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
number_of_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

**5. Coverting the data into a** m(number of the users), n(number of the movies) **matrix**

In [10]:
# because we are going to use Torch library, we won't use numpy 2D array. We'll use list of list.
# one list for each user and the length of the lists are equal to number of all movies.
def convert(data):
    matrix = []
    for user_id in range(1, number_of_users + 1):
        movies_id = data[:, 1] [data[:, 0] == user_id]
        ratings = data[:, 2] [data[:, 0] == user_id]
        user_ratings = np.zeros(number_of_movies)
        user_ratings[movies_id - 1] = ratings
        matrix.append(list(user_ratings))
    return matrix

In [11]:
training_set = convert(training_set)
test_set = convert(test_set)

In [12]:
pd.DataFrame(training_set).head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,0.0,3.0,4.0,3.0,3.0,0.0,4.0,1.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**6. Covert data to Torch Tensor**<br> for better efficiency, we're going to convert numpy array to pytorch array (tensor)

In [13]:
training_set = torch.FloatTensor(training_set)
type(training_set)

torch.Tensor

In [14]:
test_set = torch.FloatTensor(test_set)
test_set

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

**7. Covert users ratings into binary ratings: liked or disliked category**

Training Set:

In [15]:
# convert all the zeros to -1
training_set[training_set == 0] = -1
# convert 1 and 2 to 0
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
# convert all the ratings that are more than 3 to 1
training_set[training_set >= 3] = 1

Test Set:

In [16]:
# convert all the zeros to -1
test_set[test_set == 0] = -1
# convert 1 and 2 to 0
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
# convert all the ratings that are more than 3 to 1
test_set[test_set >= 3] = 1

**7. Create Neural Network Architecture**

**Contrastive Divergence Algorithm**
![Contrastive Divergence Algorithm](https://cdn-images-1.medium.com/max/1600/1*cPYfytQ30HP-2rpe_NKqmg.png)

In [17]:
class RBM:
    
    def __init__(self, number_of_visible_nodes, number_of_hidden_nodes):
        # Normal distribution: mean = 0 & variance = 1     
        self.Weight = torch.randn(number_of_hidden_nodes, number_of_visible_nodes)
        # Bias for probability of the hidden nodes given the visible nodes
        self.Bias_v_to_h = torch.randn(1, number_of_hidden_nodes)
        # Bias for probability of the visible nodes given the hidden nodes
        self.Bias_h_to_v = torch.randn(1, number_of_visible_nodes)
        
    # Sampling the hidden nodes according to the probability of hidden nodes given the visible nodes
    def sample_hidden(self, vector_v):
        # Matrix multiplication 
        weight_vector_v = torch.mm(vector_v, self.Weight.t())
        # Compute the probability of the hidden nodes given by visible nodes (Sigmoid Activation Function)
        activation = weight_vector_v + self.Bias_v_to_h.expand_as(weight_vector_v)
        probability = torch.sigmoid(activation)
        # We're goning to make bernoulli RBM, because we are just predicting a binary outcome 
        return probability, torch.bernoulli(probability)


    # Sampling the hidden nodes according to the probability of visible nodes given the hidden nodes
    def sample_visible(self, vector_h):
        # Matrix multiplication 
        # We don't use transpose because we are computing visible nodes (movies in ratings matirx)
        weight_vector_h = torch.mm(vector_h, self.Weight)
        # Compute the probability of the visible nodes given by hidden nodes (Sigmoid Activation Function)
        activation = weight_vector_h + self.Bias_h_to_v.expand_as(weight_vector_h)
        probability = torch.sigmoid(activation)
        # We're goning to make bernoulli RBM, because we are just predicting a binary outcome 
        return probability, torch.bernoulli(probability)

    
    # update weight and bias
    def train(self, v0, vk, ph0, phk):
        self.Weight += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
#         self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.Bias_h_to_v += torch.sum((v0 - vk), 0)
        self.Bias_v_to_h += torch.sum((ph0 - phk), 0)

In [18]:
# number of the movies
number_of_visible_nodes = training_set.shape[1]
# number of the features
number_of_hidden_nodes = 100
batch_size = 100
rbm = RBM(number_of_visible_nodes, number_of_hidden_nodes)

**8. Training the RBM**

In [19]:
epochs = 10
# k steps of contrastive divergence
k = 10
for epoch in range(epochs):
    train_loss = 0
    # Normalize the train loss with counter     
    counter = 0.
    for user_id in range(0, number_of_users - batch_size, batch_size):
        vk = training_set[user_id : user_id + batch_size]
        # v0 is the target, so we don't change it        
        v0 = training_set[user_id : user_id + batch_size]
        ph0, _ = rbm.sample_hidden(v0)
        # Gibbs Sampling   
        for i in range(k):
            _, hk = rbm.sample_hidden(vk)
            _, vk = rbm.sample_visible(hk)
            # We don't want to learn where there is no rating.
            vk[v0 < 0] = v0[v0 < 0] 
        phk, _ = rbm.sample_hidden(vk)
        rbm.train(v0, vk, ph0, phk)
        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
        counter += 1.
    print('epoch: {}, loss: {}'.format(str(epoch), str(train_loss/counter)))

epoch: 0, loss: tensor(0.3313)
epoch: 1, loss: tensor(0.2464)
epoch: 2, loss: tensor(0.2494)
epoch: 3, loss: tensor(0.2513)
epoch: 4, loss: tensor(0.2522)
epoch: 5, loss: tensor(0.2477)
epoch: 6, loss: tensor(0.2462)
epoch: 7, loss: tensor(0.2449)
epoch: 8, loss: tensor(0.2489)
epoch: 9, loss: tensor(0.2461)


**9. Testing the RBM**

In [20]:
test_loss = 0
counter = 0.
for user_id in range(number_of_users):
    v = training_set[user_id : user_id + 1]
    # vt is the target, so we don't change it 
    vt = test_set[user_id : user_id + 1]
    if len(vt[vt >= 0]) > 0:
        # one step
        _, h = rbm.sample_hidden(v)
        _, v = rbm.sample_visible(h)
        test_loss += torch.mean(torch.abs(vt[vt >= 0] - v[vt >= 0]))
        counter += 1.
print('test loss: ' + str(test_loss/counter))

test loss: tensor(0.2503)


**9.1.  Evaluating RBM with the RMSE**

In [21]:
# Training Set
epochs = 10
k = 10
for epoch in range(1, epochs + 1):
    train_loss = 0
    counter = 0.
    for user_id in range(0, number_of_users - batch_size, batch_size):
        vk = training_set[user_id : user_id + batch_size]
        v0 = training_set[user_id : user_id + batch_size]
        ph0,_ = rbm.sample_hidden(v0)
        for i in range(k):
            _, hk = rbm.sample_hidden(vk)
            _, vk = rbm.sample_visible(hk)
            vk[v0 < 0] = v0[v0 < 0] 
        phk, _ = rbm.sample_hidden(vk)
        rbm.train(v0, vk, ph0, phk)
        # RMSE
        train_loss += np.sqrt(torch.mean((v0[v0 >= 0] - vk[v0 >= 0]) ** 2))
        counter += 1.
    print('epoch: {}, loss: {}'.format(str(epoch), str(train_loss/counter)))

epoch: 1, loss: tensor(0.4983)
epoch: 2, loss: tensor(0.4982)
epoch: 3, loss: tensor(0.4942)
epoch: 4, loss: tensor(0.5019)
epoch: 5, loss: tensor(0.4928)
epoch: 6, loss: tensor(0.4982)
epoch: 7, loss: tensor(0.4954)
epoch: 8, loss: tensor(0.4987)
epoch: 9, loss: tensor(0.4945)
epoch: 10, loss: tensor(0.4970)


In [22]:
# Test Set
test_loss = 0
counter = 0.
for user_id in range(number_of_users):
    v = training_set[user_id : user_id + 1]
    vt = test_set[user_id : user_id + 1]
    if len(vt[vt >= 0]) > 0:
        _,h = rbm.sample_hidden(v)
        _,v = rbm.sample_visible(h)
        # RMSE
        test_loss += np.sqrt(torch.mean((vt[vt>=0] - v[vt >= 0]) ** 2)) 
        counter += 1.
print('test loss: ' + str(test_loss/counter))

test loss: tensor(0.4710)
