# Ristricted Boltzmann Machines

In [1]:
# Importing the libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
# Importing the dataset
movies = pd.read_csv('ml-1m/movies.dat',sep = '::', header=None, engine='python', encoding='latin-1')
users = pd.read_csv('ml-1m/users.dat',sep = '::', header=None, engine='python', encoding='latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat',sep = '::', header=None, engine='python', encoding='latin-1')

In [3]:
movies.head()

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
users.head()

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [5]:
ratings.head()

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [6]:
# Preparing the training set and the test set
training_set = pd.read_csv('ml-100k/u1.base',delimiter='\t')
test_set = pd.read_csv('ml-100k/u1.test',delimiter='\t')
type(training_set)

pandas.core.frame.DataFrame

In [7]:
training_set.head()

Unnamed: 0,1,1.1,5,874965758
0,1,2,3,876893171
1,1,3,4,878542960
2,1,4,3,876893119
3,1,5,3,889751712
4,1,7,4,875071561


In [8]:
test_set.head()

Unnamed: 0,1,6,5,887431973
0,1,10,3,875693118
1,1,12,5,878542960
2,1,14,5,874965706
3,1,17,3,875073198
4,1,20,4,887431883


In [9]:
training_set = np.array(training_set,dtype='int')
test_set = np.array(test_set,dtype='int')

In [10]:
# Getting the number of users and movies
nb_users = int(max(max(training_set[:,0]),max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]),max(test_set[:,1])))
print(nb_users)
print(nb_movies)

943
1682


In [11]:
# Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked) in training set
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set == 3] = 1
training_set[training_set == 4] = 1
training_set[training_set == 5] = 1

In [12]:
# Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked) in test set
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set == 3] = 1
test_set[test_set == 4] = 1
test_set[test_set == 5] = 1

In [13]:
# Converting the data into an array with users in lines and movies in columns
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data    

In [14]:
# Converting the data into Torch tensors
training_set = convert(training_set)
test_set = convert(test_set)

In [15]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [16]:
# Create the class RBM 
class RBM():
    # self is the object 
    # all the variables attached to the object will be created with self. 
    # nv number of visible nodes 
    # nh number of hidden nodes 
    def __init__(self, nv, nh):
        # initialize the parameters we optimize during the training weights and bias
        # weights used for the probability of the visible nodes given the hidden nodes (p_v_given_h))
        # torch.rand : random normal distribution mean=0, variance=1 
        self.W = torch.randn(nh, nv)
        # bias probability of the hidden nodes given the visible nodes (p_h_given_v))
        # fake dimension for the batch = 1
        self.a = torch.randn(1, nh)
        # bias probability of the visible nodes is activated 
        # given the value of the hidden nodes (p_v_given_h))
        self.b = torch.randn(1, nv)
        
    def sample_h(self, x):
        # probability h is activated given the value v is the sigmoid(Wx+a).
        # torch.mm make the product of two tensors. 
        # W.t()take the transpose because W is used for the p_v_given_h.
        wx = torch.mm(x, self.W.t())
        # .expand_as(wx) : expand the mini-batch.
        activation = wx + self.a.expand_as(wx)
        # probability p_h_given_v is the probability that the note drama genre is activated. 
        # v value is the input value. If v is a film drama, p_h_given_v will be hight. 
        # If v is not a film drama, p_h_given_v will be low.
        p_h_geven_v = torch.sigmoid(activation)
        # Bernouilli RBM. we predict the user loves the movie or not (0 or 1).
        # activation or not activation of the nh neurons.
        return p_h_geven_v, torch.bernoulli(p_h_geven_v)
    
    def sample_v(self, y):
        # probability h is activated given the value v is the sigmoid(Wx+a).
        # torch.mm make the product of two tensors.
        wy = torch.mm(y, self.W)
        # .expand_as(wx) : expand the mini-batch.
        activation = wy + self.b.expand_as(wy)
        p_v_geven_h = torch.sigmoid(activation)
        # Bernouilli RBM. we predict the user loves the movie or not (0 or 1).
        # activation or not activation of the nv neurons.
        return p_v_geven_h, torch.bernoulli(p_v_geven_h)
    # Contrastive divergence Algorithm
    # Optimize the weights to minimize the energy.
    # ~ Maximize the Log-Likelihood of the model. 
    # Need to approximate the gradients with the algorithm contrastive divergence.
    def train(self, v0, vk, ph0, phk):
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        # add ,0 for the tensor of two dimension 
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)

In [17]:
### Part 2 : Create the RBM Object 
# number of movies 
nv = len(training_set[0])
# parameter is tunable is the number of features that we want to detect 
# features ~ genre, actors, director, oscar, date.... 
nh = 100
# update the weights after serveral observations, also tunable
batch_size = 100
# Creation of the object of the class RBM()
rbm = RBM(nv,nh)
## Part 3 : Training the RBM 
nb_epoch = 10
# upper bound is no included nb_epoch+1 

# First for loop : epoch for loop 
for epoch in range(1,nb_epoch+1):
    #loss function initialized to 0 at the beginning of the trainning 
    train_loss = 0
    # counter which is a float . 
    s = 0.
    # Second for loop : user forloop 
    # 0 lower bound 
    # nb_users-batch_size upper bound 
    # batch_size is the step of each batch (100)
    # First batch is from user id=0 ti user id =99
    for id_user in range(0, nb_users-batch_size, batch_size):
        # at the beginning v0=vk 
        # vk is going to be updated 
        # id_user,id_user+batch_size ~id_user+100
        vk = training_set[id_user:id_user+batch_size]
        v0 = training_set[id_user:id_user+batch_size]
        ph0, _ = rbm.sample_h(v0)
        # Third for loop : Contrastive divergence
        for k in range(10):
            _, hk = rbm.sample_h(vk)
            _, vk = rbm.sample_v(hk)
            # we don't want to learn where is no rating by the user
            # no update when -1 rating. 
            vk[v0<0] = v0[v0<0]
        phk, _ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        # Compare vk updated after the training to v0 the target. 
        # simple distance in absolute value 
        # [vO>=0] take only the value with ratings / coherence with vk[v0<0]=[v0<0]
        train_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        s += 1.
    print('epoch : '+str(epoch)+' loss : '+str(train_loss/s))    

epoch : 1 loss : tensor(0.1253)
epoch : 2 loss : tensor(0.0647)
epoch : 3 loss : tensor(0.0679)
epoch : 4 loss : tensor(0.0692)
epoch : 5 loss : tensor(0.0689)
epoch : 6 loss : tensor(0.0696)
epoch : 7 loss : tensor(0.0700)
epoch : 8 loss : tensor(0.0698)
epoch : 9 loss : tensor(0.0708)
epoch : 10 loss : tensor(0.0704)


In [18]:
#testing with RBM
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _, h = rbm.sample_h(v)
        _, v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
print('test loss : '+str(test_loss/s))  

test loss : tensor(0.0580)
