In [1]:
#### important libraries
import numpy as np 
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
#### reading movies dataset
movies = pd.read_csv('../dataset/Boltzmann_Machines/ml-1m/movies.dat',sep = '::',
                    engine = 'python',header = None, encoding = 'latin-1',
                     names = ['movie_id','movie_name','movie_genre'])
movies.head()

Unnamed: 0,movie_id,movie_name,movie_genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
#### reading users dataset
users = pd.read_csv('../dataset/Boltzmann_Machines/ml-1m/users.dat',sep = '::',
                    engine = 'python',header = None, encoding = 'latin-1',
                     names = ['user_id','user_gender','user_age','job_id','zip_code'])
users.head()

Unnamed: 0,user_id,user_gender,user_age,job_id,zip_code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [4]:
#### reading ratings dataset
ratings = pd.read_csv('../dataset/Boltzmann_Machines/ml-1m/ratings.dat',sep = '::',
                    engine = 'python',header = None, encoding = 'latin-1',
                     names = ['user_id','movie_id','ratings','timestamps'])
ratings.head()

Unnamed: 0,user_id,movie_id,ratings,timestamps
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [5]:
#### prepraring the training and test set
""" Here we will not split the data manually rather use the already splitted (80-20) ratio dataset 
in our 100k (smaller dataset for education purpose)."""

#### train
train1 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u1.base', delimiter = '\t')
train1 = np.array(train1, dtype = 'int64')

train2 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u2.base', delimiter = '\t')
train2 = np.array(train2, dtype = 'int64')

train3 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u3.base', delimiter = '\t')
train3 = np.array(train3, dtype = 'int64')

train4 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u4.base', delimiter = '\t')
train4 = np.array(train4, dtype = 'int64')

train5 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u5.base', delimiter = '\t')
train5 = np.array(train5, dtype = 'int64')

#### test
test1 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u1.test', delimiter = '\t')
test1 = np.array(test1, dtype = 'int64')

test2 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u2.test', delimiter = '\t')
test2 = np.array(test2, dtype = 'int64')

test3 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u3.test', delimiter = '\t')
test3 = np.array(test3, dtype = 'int64')

test4 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u4.test', delimiter = '\t')
test4 = np.array(test4, dtype = 'int64')

test5 = pd.read_csv('../dataset/Boltzmann_Machines/ml-100k/u5.test', delimiter = '\t')
test5 = np.array(test5, dtype = 'int64')


In [6]:
#### getting the number of users and movies from all split
nb_users = int(max(max(train1[:,0]),max(train2[:,0]),max(train3[:,0]),max(train4[:,0]),max(train5[:,0]),
                  max(test1[:,0]),max(test2[:,0]),max(test3[:,0]),max(test4[:,0]),max(test5[:,0])))

nb_movies = int(max(max(train1[:,1]),max(train2[:,1]),max(train3[:,1]),max(train4[:,1]),max(train5[:,1]),
                  max(test1[:,1]),max(test2[:,1]),max(test3[:,1]),max(test4[:,1]),max(test5[:,1])))

print('number of users: ',nb_users)
print('number of movies: ',nb_movies)

number of users:  943
number of movies:  1682


In [7]:
#### coverting the data into an array with users in lines and movies in columns (required by NN)
#### list of list where for each movie list will be a list of all users rating

def convert_tolist(data):
    new_data = []
    for users_id in range(1,nb_users+1):
        """using nb_user+1 to consider the upper bound of the range"""
        movie_ids = data[:,1][data[:,0] == users_id]
        """all the movies rated by the users into a list"""
        rating_ids = data[:,2][data[:,0] == users_id]
        """all the ratings for each movies by the users into a list"""
        ratings_list = np.zeros(nb_movies)
        ratings_list[movie_ids-1] = rating_ids
        """movie_ids -1 because movie_id start at 1 whereas the rating list's index start at 0"""
        new_data.append(list(ratings_list))
    return new_data

X_train1 = convert_tolist(train1)
X_train2 = convert_tolist(train2)
X_train3 = convert_tolist(train3)
X_train4 = convert_tolist(train4)
X_train5 = convert_tolist(train5)

X_test1 = convert_tolist(test1)
X_test2 = convert_tolist(test2)
X_test3 = convert_tolist(test3)
X_test4 = convert_tolist(test4)
X_test5 = convert_tolist(test5)

In [9]:
#### coverting the data into Torch tensors
X_train1 = torch.FloatTensor(X_train1)
X_train2 = torch.FloatTensor(X_train2)
X_train3 = torch.FloatTensor(X_train3)
X_train4 = torch.FloatTensor(X_train4)
X_train5 = torch.FloatTensor(X_train5)

X_test1 = torch.FloatTensor(X_test1)
X_test2 = torch.FloatTensor(X_test2)
X_test3 = torch.FloatTensor(X_test3)
X_test4 = torch.FloatTensor(X_test4)
X_test5 = torch.FloatTensor(X_test5)

In [11]:
#### converting the ratings into binary ratings : Liked (1) and Not Liked (0)

""" here we are considering all movies irrespective of whether the movie is rated or watched by the user or not.
This refers to BM which try to predict or assign rating to the missing movies 
(follow conceptual videos for detail) """

""" because we are converting the ratings to binary, we will replace 0 with -1 to state missing
we will replace 1 abd 2 as not liked and more than 2 as liked"""

X_train1[X_train1 == 0] = -1
X_train1[X_train1 == 1] = 0
X_train1[X_train1 == 2] = 0
X_train1[X_train1 >= 3] = 1

X_train2[X_train2 == 0] = -1
X_train2[X_train2 == 1] = 0
X_train2[X_train2 == 2] = 0
X_train2[X_train2 >= 3] = 1

X_train3[X_train3 == 0] = -1
X_train3[X_train3 == 1] = 0
X_train3[X_train3 == 2] = 0
X_train3[X_train3 >= 3] = 1

X_train4[X_train4 == 0] = -1
X_train4[X_train4 == 1] = 0
X_train4[X_train4 == 2] = 0
X_train4[X_train4 >= 3] = 1

X_train5[X_train5 == 0] = -1
X_train5[X_train5 == 1] = 0
X_train5[X_train5 == 2] = 0
X_train5[X_train5 >= 3] = 1

X_test1[X_test1 == 0] = -1
X_test1[X_test1 == 1] = 0
X_test1[X_test1 == 2] = 0
X_test1[X_test1 >= 3] = 1

X_test2[X_test2 == 0] = -1
X_test2[X_test2 == 1] = 0
X_test2[X_test2 == 2] = 0
X_test2[X_test1 >= 3] = 1

X_test3[X_test3 == 0] = -1
X_test3[X_test3 == 1] = 0
X_test3[X_test3 == 2] = 0
X_test3[X_test3 >= 3] = 1

X_test4[X_test4 == 0] = -1
X_test4[X_test4 == 1] = 0
X_test4[X_test4 == 2] = 0
X_test4[X_test4 >= 3] = 1

X_test5[X_test5 == 0] = -1
X_test5[X_test5 == 1] = 0
X_test5[X_test5 == 2] = 0
X_test5[X_test5 >= 3] = 1

In [19]:
#### Creating the architecture of the Neural Network
class RBM():
    
    def __init__(self, num_visible_nodes, num_hidden_nodes):
        
        """ assignment weights (normally distributed) based on number of visible and hidden nodes"""
        self.Weight = torch.randn(num_hidden_nodes,num_visible_nodes) #100*1682 
        
        """ bias based on probability of hidden nodes given the visible node and vise versa - 
        the arguements are batch size (1) and the corresponding number"""
        self.bias_visible_node = torch.randn(1,num_visible_nodes) #1*100 
        self.bias_hidden_node = torch.randn(1,num_hidden_nodes) #1*1682
        
    """sigmoid activation function - gibb sampling for loglikelihood gradient - to estimate the probability
    of hidden nodes given the visible nodes - sample the activations of the hidden nodes - 
    This function will sample the activations of each of the hidden nodes according to the probability of 
    hidden nodes given Visible nodes and vice versa"""
    
    def sample_hidden(self, x):
        
        """X corresponds to the visible neurons (V) in the probabilities , p(h) given V"""
        """product of Weight and probability - here weight's transpose is consider for correct calculations"""
        wx = torch.mm(x, self.Weight.t()) #100*1682 * 1682*100 = 100*100
        
        """activation function = linear function+bias - here expand fn is used for each weights + bias,
        this activation function gives the probabilty  that the hidden node will be activated 
        according to the value of visible node"""
        activation = wx + self.bias_hidden_node.expand_as(wx) 
        
        """calculate probabilty of hidden given visible (suppose user like drama movies ,
        hidden nodes having drama will have higher probability) - use sigmoid function"""
        p_h_given_v = torch.sigmoid(activation) #100*100
        
        """ we are using Bernoulli RBM, because our ratings are binary. We need Bernoulli samples - 
        if p_h_given_v  >= 0.7 (70%), we will activate the neuron else not """
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    
    def sample_visible(self, y):
        
        """y corresponds to the hidden neurons (h) in the probabilities , p(v) given h - no transpose"""
        wy = torch.mm(y, self.Weight) #100*100 * 100*1682 = 100*1682
        
        """activation function = linear function+bias - here expand fn is used for each weights + bias,
        this activation function gives the probabilty  that the visible node will be activated 
        according to the value of hidden node"""
        activation = wy + self.bias_visible_node.expand_as(wy)
        
        """calculate probabilty of visible given hidden - use sigmoid function"""
        p_v_given_h = torch.sigmoid(activation) #100*1682
        
        """ we are using Bernoulli RBM, because our ratings are binary. We need Bernoulli samples"""
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    
    """contrastive divergence - input V0 - all ratings of one user(it will iterate for all users) 
    input vk - visible nodes obtained after k samplings  
    (k trips(k iterations and k contrastive divergence) from visible to hidden to visible) - 
    input ph0 - probability of first hidden node given visible node. - input phk - 
    probabilities of the hidden nodes after k sampling given the values of visible nodes VK"""
    
    def train(self, v0, vk, ph0, phk):
        self.Weight += (torch.mm(v0.t(), ph0)  - torch.mm(vk.t(), phk)).t() #100*1682
        #!= 1682*100 * 100*100 - 1682*100 * 100*100 = 1682*100
        
        """zero to keep the dimension of tensor 2D"""
        self.bias_visible_node += torch.sum((v0 - vk), 0) 
        self.bias_hidden_node += torch.sum((ph0 - phk), 0)

In [23]:
#### variable assignment

""" here the number of visible node = number of movies"""
nv = len(X_train1[0])

""" here number of hidden nodes are arbitary (how many hidden features to calculate) - can be tuned further"""
nh = 100

"""additional batchsize for overal batch train """
batch_size = 100

"""RBM object"""
rbm = RBM(nv,nh)

#### training the RBM 

"""number of epochs"""
nb_epoch = 20

"""training loop"""
for epoch in range(1,nb_epoch+1):
    """declaring training_loss and counter"""
    training_loss = 0
    count = 0.0
    """ we have to pass batch of users (not single user) for training"""
    for users_id in range(0,nb_users - batch_size,batch_size):
        """initialize nput V0 - all ratings of batch of user(it will iterate for all users) 
        input vk - visible nodes obtained after k samplings (initially vk = v0 , will be updated with epochs) 
        (k trips(k iterations and k contrastive divergence) from visible to hidden to visible) - 
        input ph0 - probability of first hidden node given visible node. - input phk - 
        probabilities of the hidden nodes after k sampling given the values of visible nodes VK"""
        vk = X_train1[users_id:users_id+batch_size]
        v0 = X_train1[users_id:users_id+batch_size]
        ph0,_ = rbm.sample_hidden(v0)
        
        """k = 10 steps for constrastive divergence"""
        for k in range(10):
            
            """updating hidden nodes and visible nodes (constrastive divergence) for each iteration for each batch"""
            """first input to create hidden - hidden to output visible and so on"""
            _,hk = rbm.sample_hidden(vk)
            _,vk = rbm.sample_visible(hk)
            
            """fixing (we are not updating the -1 ratings(missing)) the negative rating weights"""
            vk[v0<0] = v0[v0<0]
        """phk for updated vk"""
        phk,_ = rbm.sample_hidden(vk)
        
        """training with updated values"""
        rbm.train(v0, vk, ph0, phk )
        
        """updating training loss and counter - for all non negative values"""
        training_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        count += 1
    
    """Average Distance training loss"""
    print('epoch: '+str(epoch)+ ' loss: '+str(training_loss/count))

epoch: 1 loss: tensor(0.3231)
epoch: 2 loss: tensor(0.2338)
epoch: 3 loss: tensor(0.2461)
epoch: 4 loss: tensor(0.2492)
epoch: 5 loss: tensor(0.2486)
epoch: 6 loss: tensor(0.2476)
epoch: 7 loss: tensor(0.2447)
epoch: 8 loss: tensor(0.2502)
epoch: 9 loss: tensor(0.2455)
epoch: 10 loss: tensor(0.2492)
epoch: 11 loss: tensor(0.2452)
epoch: 12 loss: tensor(0.2476)
epoch: 13 loss: tensor(0.2471)
epoch: 14 loss: tensor(0.2493)
epoch: 15 loss: tensor(0.2475)
epoch: 16 loss: tensor(0.2494)
epoch: 17 loss: tensor(0.2451)
epoch: 18 loss: tensor(0.2467)
epoch: 19 loss: tensor(0.2470)
epoch: 20 loss: tensor(0.2479)


In [26]:
#### testing 

"""declaring test_loss and counter"""
test_loss = 0
count = 0.0
""" we don't need batch of users (we need all single users) for testing - batch_size = 1"""
for users_id in range(0,nb_users):

    """we have two inputs v = ratings of training set which is needed to activate hidden neurons 
    so it can predict the output vt = input of test set"""
    v = X_train1[users_id:users_id+1]
    vt = X_test1[users_id:users_id+1]

    """we have already trained our model to perform the best using constrastive divergence of 10 steps,
    we don't need 10 steps for prediction, but only 1. Also we need to predict for values of non negative"""
    if len(vt[vt>=0])>0:

        """updating hidden nodes and visible nodes"""
        """first input to create hidden - hidden to output visible """
        _,h = rbm.sample_hidden(v)
        _,v = rbm.sample_visible(h)

        """updating testing loss and counter - for all non negative values"""
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        count += 1

"""Average Distance testing loss"""
print('test_loss: '+str(test_loss/count))

test_loss: tensor(0.2460)
