# Restricted Boltzmann Machine

A recommender system for movies, built using a Restricted Boltzmann Machine (RBM) that is trained on the [MovieLens](https://grouplens.org/datasets/movielens/) dataset. This model was built as part of a workshop for a Udemy course called [Deep Learning A-Z](https://www.udemy.com/course/deeplearning/learn/lecture/6895718). 

<br></br>
***
## Libraries

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

<br></br>
***
## Data Preprocessing

In [4]:
# Dataset
movies = pd.read_csv('ml-1m/movies.dat',
                     sep = '::',                # Data separated by '::'
                     header = None,             # No column headers
                     engine = 'python',         # Use python's built-in CSV parser
                     encoding = 'latin-1')      # Character encoding
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

# Training data
trainingSet = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
trainingSet = np.array(trainingSet, dtype = 'int')
testSet = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
testSet = np.array(testSet, dtype = 'int')

In [5]:
# Determine the number of users and movies.
# Find the max id for users/movies in both the training set and the test set.
numUsers = int(max(max(trainingSet[:,0]), max(testSet[:,0])))   # Users are listed in the 1st column
numMovies = int(max(max(trainingSet[:,1]), max(testSet[:,1])))  # Movies are listed in the 2cnd column
print("Number of users: ", numUsers)
print("Number of movies: ", numMovies)

# Convert the data into a grid of users and their ratings for each movie
def convert(data):
    newData = []
    for idUsers in range(1, numUsers + 1):
        idMovies = data[:,1][data[:,0] == idUsers]
        idRatings = data[:,2][data[:,0] == idUsers]
        ratings = np.zeros(numMovies)
        ratings[idMovies - 1] = idRatings
        newData.append(list(ratings))
    return newData

trainingSet = convert(trainingSet)
testSet = convert(testSet)

trainingSet = torch.FloatTensor(trainingSet)
testSet = torch.FloatTensor(testSet)

# Convert ratings to binary values (0:Disliked 1:Liked)
# This step is necessary as the trained model will only suggest which movies a person may like and
# not how much they may like each movie
trainingSet[trainingSet == 0] = -1
trainingSet[trainingSet == 1] = 0
trainingSet[trainingSet == 2] = 0
trainingSet[trainingSet >= 3] = 1
testSet[testSet == 0] = -1
testSet[testSet == 1] = 0
testSet[testSet == 2] = 0
testSet[testSet >= 3] = 1

Number of users:  943
Number of movies:  1682


<br></br>
***
## RBM Architecture

This model utilises a Bernoulli RBM - a type of RBM where both the visible and hidden units are binary, meaning that their activation states can take on values of either 0 or 1. Bernoulli RBMs are commonly used in modeling binary data, such as binary images, binary text data, or binary feature vectors.


In [6]:
class RBM():
    # nv : Number of visible nodes
    # nh : Number of hidden nodes
    def __init__(self, nv, nh):
        # Randomly initialise tensor of size, nh x nv
        self.W = torch.randn(nh, nv)
        
        # Randomly initialise bias of visible nodes, given hidden nodes
        # NOTE: 2d tensor. 1st dimension is the batch.
        self.a = torch.randn(1, nh)
        
        # Randomly initialise bias of hidden nodes, given visible nodes
        self.b = torch.randn(1, nv)
    
    def sampleHiddenNodes(self, x):
        # Product of weights x neurons
        wx = torch.mm(x, self.W.t())
        
        # Add bias
        activation = wx + self.a.expand_as(wx)
        
        probabilityHGivenV = torch.sigmoid(activation)
        
        return probabilityHGivenV, torch.bernoulli(probabilityHGivenV)
    
    def sampleVisibleNodes(self, y):
        # Product of weights x neurons
        wy = torch.mm(y, self.W)
        
        # Add bias
        activation = wy + self.b.expand_as(wy)
        
        probabilityVGivenH = torch.sigmoid(activation)
        
        return probabilityVGivenH, torch.bernoulli(probabilityVGivenH)
    
    # Contrastive divergence
    # v0: Input vector of movie ratings for a user
    # vk: Visible nodes obtained after 'k' iterations
    # ph0: Vector of probabilities
    # phk: Probablities of hidden nodes after 'k' iterations
    def train(self, v0, vk, ph0, phk):
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)
        

<br></br>
***
## Training

In [10]:
nv = len(trainingSet[0])
nh = 100
batchSize = 100
rbm = RBM(nv, nh)

# Training the RBM
nbEpoch = 10
for epoch in range(1, nbEpoch + 1):
    trainLoss = 0
    s = 0.
    
    for idUser in range(0, numUsers - batchSize, batchSize):
        vk = trainingSet[idUser:idUser+batchSize]
        v0 = trainingSet[idUser:idUser+batchSize]
        ph0,_ = rbm.sampleHiddenNodes(v0)
        
        for k in range(10):
            _,hk = rbm.sampleHiddenNodes(vk)
            _,vk = rbm.sampleVisibleNodes(hk)
            vk[v0<0] = v0[v0<0]
            
        phk,_ = rbm.sampleHiddenNodes(vk)
        rbm.train(v0, vk, ph0, phk)
        trainLoss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        s += 1.
        
    print('epoch: '+str(epoch)+' loss: '+str(trainLoss/s))

# Testing the RBM
testLoss = 0
s = 0.

for idUser in range(numUsers):
    v = trainingSet[idUser:idUser+1]
    vt = testSet[idUser:idUser+1]
    
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sampleHiddenNodes(v)
        _,v = rbm.sampleVisibleNodes(h)
        testLoss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
        
print('test loss: '+str(testLoss/s))

epoch: 1 loss: tensor(0.3646)
epoch: 2 loss: tensor(0.2548)
epoch: 3 loss: tensor(0.2396)
epoch: 4 loss: tensor(0.2535)
epoch: 5 loss: tensor(0.2502)
epoch: 6 loss: tensor(0.2457)
epoch: 7 loss: tensor(0.2493)
epoch: 8 loss: tensor(0.2495)
epoch: 9 loss: tensor(0.2497)
epoch: 10 loss: tensor(0.2442)
test loss: tensor(0.2672)
