# Import Libraries

In [11]:
import warnings
warnings.filterwarnings(action='ignore')

In [12]:
import numpy as np
import pandas as pd
import torch 
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

# Load the dataset

In [22]:
movies = pd.read_csv('ml-1m/movies.dat', sep='::', encoding='latin-1', header=None)

In [23]:
movies.head()

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [31]:
movies.shape

(3883, 3)

In [20]:
users = pd.read_csv('ml-1m/users.dat', sep='::', encoding='latin-1', header=None)

In [21]:
users.head()

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [30]:
users.shape

(6040, 5)

In [24]:
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', encoding='latin-1', header=None)

In [25]:
ratings.head()

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


# prepare the training and test dataset

In [63]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter='\t', header=None)

In [64]:
training_set.shape

(80000, 4)

In [65]:
training_set

Unnamed: 0,0,1,2,3
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712
...,...,...,...,...
79995,943,1067,2,875501756
79996,943,1074,4,888640250
79997,943,1188,3,888640250
79998,943,1228,3,888640275


In [66]:
test_set = pd.read_csv('ml-100k/u1.test', delimiter='\t', header=None)

In [67]:
test_set.head()

Unnamed: 0,0,1,2,3
0,1,6,5,887431973
1,1,10,3,875693118
2,1,12,5,878542960
3,1,14,5,874965706
4,1,17,3,875073198


In [68]:
n_users = max(training_set.iloc[:,0].max(), test_set.iloc[:,0].max())

In [69]:
n_movies = max(training_set.iloc[:,1].max(), test_set.iloc[:,1].max())

In [75]:
n_users, n_movies

(943, 1682)

## Convert the data into an array with users in row and movies in columns

In [70]:
def prepare_data(data):
    new_data=[]
    for id_users in range(1, n_users+1):
        id_movies = data.iloc[:,1][data.iloc[:,0] == id_users]
        id_ratings = data.iloc[:,2][data.iloc[:,0] == id_users]
        ratings=np.zeros(n_movies)
        ratings[id_movies-1] = id_ratings
        new_data.append(list(ratings))
    return new_data

In [71]:
training_set = np.array(prepare_data(training_set))

In [72]:
training_set.shape

(943, 1682)

In [73]:
test_set = np.array(prepare_data(test_set))

In [74]:
test_set.shape

(943, 1682)

## Convert data into Torch Tensors

In [76]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

## Convert the data into binary ratings 1 (liked), 0(Not Liked)

In [77]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [130]:
class RBM:
    
    ## To initialize random weights and bias for the RBM
    def __init__(self, nv, nh):
        ## Random eights for thr RBM
        self.W = torch.randn(nh, nv)
        ## Random bais for hidden layer
        self.a = torch.randn(1, nh)
        ## Random bais for visible layer
        self.b = torch.randn(1, nv)
    
    def sample_h(self, x):
        '''This function will calculate information in hidden layer by using given data at visible layer'''
        ## calculating the info for hidden layer from given input in visible layer
        ## matrix multiplication of input x and weight metrix w
        wx = torch.mm(x, self.W.t())
        ## adding bais to the wx
        activation = wx + self.a.expand_as(wx)
        ## applying activation function to the calculated values sigmoid(w*X +b)
        p_h_given_v = torch.sigmoid(activation)
        ## returning sigmoid activated values(probability) and bernoulli values(0,1)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    
    def sample_v(self, y):
        '''This function will calculate regenerated visible layer's information by using 
        hidden layer's information
        
        y = sigmoid(w*X +b)
        
        '''
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    
    def train(self, v0, vk, ph0, phk):
        
        '''This function will update weights and biases to generate more accurate data at visible layer'''
        
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0-vk), 0)
        self.a += torch.sum((ph0-phk), 0)


In [131]:
training_set[0].shape

torch.Size([1682])

In [132]:
nv = 1682
nh = 100

In [133]:
rbm = RBM(nv, nh)

In [126]:
n_epochs = 50
batch_size = 100
for epoch in range(1, n_epochs+1):
    train_loss = 0
    s = 0
    for id_user in range(0, n_users-batch_size, batch_size):
        ## v0 is original information available at visible layer
        v0 = training_set[id_user:id_user+batch_size]
        ## vk will be the regenrated information at visible layer
        vk = training_set[id_user:id_user+batch_size]
        
        ## visible to hidden, ph0 will be the initial hidden state of RBM, stored sigmoid info
        ph0,_ = rbm.sample_h(v0)
        
        ## visible to hidden, stored hidden bernoulli info
        _, hk = rbm.sample_h(vk)
        ## hidden to visible from bernoulli info of hidden, stored bernoulli info at visible as regenerated info
        _, vk = rbm.sample_v(hk)
        ## re assigining negaive values to regenerated info at visible layer as neither sigmoid nor bernoulli
        ## can return negative values
        vk[v0<0] = v0[v0<0]
        ## phk will be the k_th hidden state of RBM, stored sigmoid info
        phk, _ = rbm.sample_h(vk)
        ## update weights and baises of the network
        rbm.train(v0, vk, ph0, phk)
        ## calculate the loss for current batch
        train_loss += torch.mean(torch.abs(v0[v0>0] - vk[v0>0]))
        ## count the batch
        s+=1
        
    print('epoch: {}, loss: {}'.format(epoch, train_loss/s))

epoch: 1, loss: 0.2955971956253052
epoch: 2, loss: 0.15125542879104614
epoch: 3, loss: 0.15376204252243042
epoch: 4, loss: 0.14882618188858032
epoch: 5, loss: 0.1487060934305191
epoch: 6, loss: 0.14841778576374054
epoch: 7, loss: 0.15111255645751953
epoch: 8, loss: 0.13893479108810425
epoch: 9, loss: 0.14096790552139282
epoch: 10, loss: 0.14620280265808105
epoch: 11, loss: 0.14866237342357635
epoch: 12, loss: 0.1470043957233429
epoch: 13, loss: 0.14584265649318695
epoch: 14, loss: 0.14880329370498657
epoch: 15, loss: 0.1443464457988739
epoch: 16, loss: 0.14913347363471985
epoch: 17, loss: 0.147862046957016
epoch: 18, loss: 0.14702129364013672
epoch: 19, loss: 0.14766325056552887
epoch: 20, loss: 0.14735519886016846
epoch: 21, loss: 0.14749248325824738
epoch: 22, loss: 0.1443747878074646
epoch: 23, loss: 0.14879004657268524
epoch: 24, loss: 0.14769099652767181
epoch: 25, loss: 0.14751321077346802
epoch: 26, loss: 0.15104565024375916
epoch: 27, loss: 0.14286895096302032
epoch: 28, loss: 

In [117]:
test_set.shape

torch.Size([943, 1682])

In [119]:
ph, h = rbm.sample_h(test_set)

In [121]:
ph, v_reg = rbm.sample_v(h)

In [127]:
v_reg[test_set<0] = test_set[test_set<0]

In [128]:
test_set

tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])

In [134]:
v_reg

tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])