### We build a Boltzmann Machine that predicts that if a user likes a movie or not

### Import required packages

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn #To implement Neural Networks
import torch.nn.parallel #For parallel computation
import torch.optim as optim #For Optimizers
import torch.utils.data #For tools
from torch.autograd import Variable #For Stochastic Gradient Descent

In [2]:
#Importing the dataset
movies = pd.read_csv('ml-1m/movies.dat', sep='::', header=None, engine='python', encoding='latin-1')

In [3]:
#checking the head
movies.head()

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
users = pd.read_csv('ml-1m/users.dat', sep='::', header=None, engine='python', encoding='latin-1')

In [6]:
users.head() #ID, Gender, Age, somecode, digitcode

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [7]:
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', header=None, engine='python', encoding='latin-1')

In [9]:
ratings.head() #UserID, MoviesID, Rating, Timestamps(not required at all)

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [10]:
#Preparing Training set & Test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter='\t') #u1.base means some dataset from the whole dataset

In [11]:
training_set.head()

Unnamed: 0,1,1.1,5,874965758
0,1,2,3,876893171
1,1,3,4,878542960
2,1,4,3,876893119
3,1,5,3,889751712
4,1,7,4,875071561


In [12]:
#We have to convert training_set to an array
training_set = np.array(training_set, dtype = 'int')

In [13]:
#Test set
test_set = pd.read_csv('ml-100k/u1.test', delimiter='\t') #u1.base means some dataset from the whole dataset
test_set = np.array(test_set, dtype = 'int')

In [18]:
#Getting the total number of users & movies
nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

In [19]:
print('Total Users:',nb_users)
print('Total Movies:', nb_movies)

Total Users: 943
Total Movies: 1682


In [24]:
#Converting the data into an array with users in lines and movies in cols
def convert(data):
    new_data = []
    for id_users in range(1,nb_users+1):
        movie_ids = data[:, 1][data[:,0] == id_users]
        ratings_ids = data[:, 2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[movie_ids - 1] = ratings_ids
        new_data.append(list(ratings))
        
    return new_data

In [25]:
training_set = convert(training_set)
test_set = convert(test_set)

In [26]:
#Converting the data into Torch tensors
training_set = torch.FloatTensor(training_set)#FloatTensor expects an arg with a lists of list
test_set = torch.FloatTensor(test_set)

In [28]:
training_set.shape

torch.Size([943, 1682])

In [30]:
#Convert the ratings into the binary ratings : (Liked-1) and (Disliked-0)
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0 #rating=1
training_set[training_set == 2] = 0 #rating=2
training_set[training_set >= 3] = 1 #ratings 3,4,5

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0 #rating=1
test_set[test_set == 2] = 0 #rating=2
test_set[test_set >= 3] = 1 #ratings 3,4,5

In [31]:
#Build an architecture of RBM(Remember that RBM is a probabilistic Graphical Model)
class RBM():
    def __init__(self, nb_visible, nb_hidden):
        self.W = torch.randn(nb_hidden, nb_visible) #initialize all the weights with size hidden,visible
        self.a = torch.randn(1, nb_hidden) #As, torch methods always expects args to not to be 1D...so we give 2D tensor with batchsize and hidden nodes
        self.b = torch.randn(1, nb_visible) #Bias for visible nodes
        
    def sample_hidden(self, x): #x - #visible neurons in the prob(H|v)
        wx = torch.nn(x, self.W.t()) #Product of x and Weights
        #activation fn = wx + bias(bias of hidden nodes)
        activation = wx + self.a.expand_nn(wx) #Represents probability that the hidden node activated
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    
    def sample_visible(self, y): #x - #visible neurons in the prob(H|v)
        wy = torch.nn(y, self.W) #Product of x and Weights
        #activation fn = wy + bias(bias of visible nodes)
        activation = wy + self.b.expand_nn(wy) #Represents probability that the hidden node activated
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    #Contrastive Divergence - to calculate likelihood gradient
     #v0-I/P vec, vk- visible node obtained after k-samping, vec of prob at 1st iter at hidden=1 given v0, phk-prob of hidden nodes after k-sampling
    def train(self, v0, vk, ph0, phk):
        self.W += torch.nn(v0.t(), ph0) - torch.nn(vk.t(), phk)
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)
        

In [34]:
#Creating an object to RBM class
nb_visible = len(training_set[0])
nb_hidden = 100 #no. of features
batch_size = 100
rbm = RBM(nb_visible, nb_hidden)

### Training the RBM


In [35]:
nb_epochs = 10

for epoch in range(1,nb_epochs+1):
    train_loss = 0
    s = 0. #Counter
    for user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[user:user+batch_size]
        v0 = training_set[user:user+batch_size]
        #Initial probabilities
        ph0, _ = rbm.sample_hidden(v0) #'_' is used bcoz, we need only the 1st value returned y sample_hidden()
        #Gibb's Chain
        for k in range(10):
            