In [0]:
#Binary Classification of Movie of recommender System Using Boltzmann Machine
from google.colab import files
files.upload();

# Importing DATA

In [0]:
!wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
!wget http://files.grouplens.org/datasets/movielens/ml-100k.zip

In [0]:
!ls
!unzip ml-1m.zip
!unzip ml-100k.zip

In [0]:
#Basic Library
import pandas as pd
import numpy as np

#Pytorch Library
import torch
import torch.nn as nn #neural network
import torch.nn.parallel
import torch.optim as optim #optimizer
import torch.utils.data
from torch.autograd import variable #for stochastic gradient descent

# DATA PREPROCESSING

**Importing data Explanation** 
The  file is dat
1)Seperator is :: not comma as movie file name can contain comma
2) header = None because nothing is specified as header default is infer means first row
3) encoding is latin-1 not utf8 

In [0]:
#importing the data set (.dat file not csv)
movies = pd.read_csv("ml-1m/movies.dat", header = None, encoding = 'latin-1' ,sep="::", engine = 'python')
users = pd.read_csv("ml-1m/users.dat", header = None, encoding = 'latin-1' ,sep="::", engine = 'python')
ratings = pd.read_csv("ml-1m/ratings.dat", header = None, encoding = 'latin-1' ,sep="::", engine = 'python')

In [0]:
movies.head()
users.head() 
ratings.head()

In [0]:
#preparing the training set and test set 80/20 split
training_set = pd.read_csv("ml-100k/u1.base", delimiter = '\t')

#pytorch does not work with dataframe ,it works only with array 
#converting dataframe to aray using numpy
training_set = np.array(training_set ,dtype='int')
test_set = pd.read_csv("ml-100k/u1.test", delimiter = '\t')
test_set = np.array(test_set ,dtype='int')

In [0]:
#Getting the number of users and movies to make a matrix where each cell is rating (u,m) u=user and m=movie
nb_users = int(max(max( training_set[:,0]),max(test_set[:,0])))
nb_movies = int(max(max( training_set[:,1]),max(test_set[:,1])))

In [0]:
#converting the data into array with users as lines and movies as column(specific structure to feed to nn)
def convert(data):
  new_data = []            #making list of lists rather than matrix
  for id_users in range(1, nb_users+1):
    id_movies = data[:,1][data[:,0]==id_users]
    id_rating = data[:,2][data[:,0]==id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies-1]=id_rating
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set) #now it is list of lists 
test_set = convert(test_set)

In [0]:
#converting the data into Torch tensor (Multidimensional Matrix)
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

# Building a Boltzmann Machine

In [0]:
#converting the rating into binary rating(1 or 0) and -1 for not rated 
training_set[training_set == 0] = -1
training_set[training_set >= 3] = 1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
test_set[test_set == 0] = -1
test_set[test_set >= 3] = 1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0

In [0]:
# Creating the architecture of the Neural Network
class RBM():
    # nv = visible nodes
    # nh = hidden nodes
    def __init__(self, nv, nh):
        # Initialize the weights - this consists of a matrix with the size of the hidden nodes and visible nodes
        self.W = torch.randn(nh, nv)
        # Initialize the bias and add a 2nd Dimension
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv)
    
    ## Sample the hidden nodes
    def sample_h(self, x):
        # Define product of the weights
        # .t = transpose which is used to make the equation mathematically correct
        wx = torch.mm(x, self.W.t())
        # expand_as = make the activation function the same Dimension for each mini-batch
        activation = wx + self.a.expand_as(wx)
        # Probability value given the visible nodes
        # Given the value of the visible nodes we return the probability of each of the hidden nodes = 1
        p_h_given_v = torch.sigmoid(activation)
        # Based on the probability, activate the hidden node
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    
    ## Sample the visible nodes
    def sample_v(self, y):
        # Define product of the weights
        wy = torch.mm(y, self.W)
        # expand_as = make the activation function the same Dimension for each mini-batch
        activation = wy + self.b.expand_as(wy)
        # Probability value given the hidden nodes
        # Given the value of the hidden nodes we return the probability of each of the visible nodes = 1
        p_v_given_h = torch.sigmoid(activation)
        # Based on the probability, predict whether the user will like the movie or not
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    
    ## Contrastive Divergence
    # v0 = Input vector, e.g. ratings of all the movies by one user
    # vk = Visible nodes after k sampling
    # ph0 = Vector of probabilities, at first iteration the hidden nodes = 1 given the values of v0
    # phk = Probabilities of the hidden nodes after k sampling
    def train(self, v0, vk, ph0, phk):
        # Updating the weights
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        # The sum is used to keep the same dimensions of the bias
        self.b += torch.sum( (v0 - vk), 0 )
        self.a += torch.sum( (ph0 - phk), 0 )

nv = len(training_set[0])
nh = 100
batch_size = 100

rbm = RBM(nv,nh)

In [48]:
# Training the RBM
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
    ### Maximum Absolute Value
    train_loss = 0
    s = 0.
    ## Batch learning
    for id_user in range(0, nb_users - batch_size, batch_size):
        # Grabs the batch of units
        vk = training_set[id_user:id_user + batch_size]
        v0 = training_set[id_user:id_user + batch_size]
        # [variable name],_ = Only return first element of the function
        # Used to start the loop to make the Gibbs Chain for Gibbs Sampling
        ph0,_ = rbm.sample_h(v0)
        ## K-step Contrastive Divergence
        for k in range(10):
            # _,[variable name] = Only return second element of the function
            # returns sample of hidden nodes
            _,hk = rbm.sample_h(vk)
            # Returns sample of visible nodes
            _,vk = rbm.sample_v(hk)
            # Avoid using the ratings with -1 (Movies a user hasn't rated)
            vk[v0 < 0] = v0[v0 < 0]
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        ## Update the train loss
        # Identify the absolute value of the ratings that exist (that are not -1)
        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
        # Update the counter to normalize the train loss
        s += 1.
    print('epoch: ' + str(epoch) + ' loss: ' + str(train_loss/s))

epoch: 1 loss: tensor(0.2421)
epoch: 2 loss: tensor(0.2478)
epoch: 3 loss: tensor(0.2465)
epoch: 4 loss: tensor(0.2454)
epoch: 5 loss: tensor(0.2482)
epoch: 6 loss: tensor(0.2439)
epoch: 7 loss: tensor(0.2469)
epoch: 8 loss: tensor(0.2453)
epoch: 9 loss: tensor(0.2461)
epoch: 10 loss: tensor(0.2453)


In [52]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        vt[vt>=0]
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0])) # Average Distance here
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.2541)
