In [44]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

### Reading Inputs

In [45]:
movies = pd.read_csv('ml-1m/movies.dat',sep='::',header=None,engine='python')
users = pd.read_csv('ml-1m/users.dat',sep='::',header=None,engine='python')

In [46]:
movies.sample(2)

Unnamed: 0,0,1,2
2207,2276,"Soldier's Daughter Never Cries, A (1998)",Drama
2876,2945,Mike's Murder (1984),Mystery


In [47]:
users.sample(2)

Unnamed: 0,0,1,2,3,4
3100,3101,M,18,15,91101
5618,5619,F,18,4,22310


### Reading Trainning Set

In [48]:
trainning_set = pd.read_csv('ml-100k/u1.base',sep='\t',header=None)
test_set = pd.read_csv('ml-100k/u1.test',sep='\t',header=None)

In [49]:
trainning_set.sample(2)

Unnamed: 0,0,1,2,3
37839,510,325,1,887667575
47369,615,582,3,879447968


In [50]:
test_set.sample(2)

Unnamed: 0,0,1,2,3
4363,91,134,4,891439353
12658,276,268,4,877584085


### Need to construct matrix of size num_users x num_movies

In [51]:
trainning_set = np.array(trainning_set)
test_set = np.array(test_set)

In [52]:
no_of_users = np.max(trainning_set[:,0])
no_of_movies = np.max(trainning_set[:,1])

In [53]:
def build_matrix(input_data):

    master_matrix = []
    for i in range(1,no_of_users+1):
        row = np.zeros(no_of_movies)
        movies = input_data[:,1][input_data[:,0] == i]
        ratings = input_data[:,2][input_data[:,0] == i]
        row[movies -1] = ratings
        master_matrix.append(row)
    return np.array(master_matrix)

In [54]:
trainning_set = build_matrix(trainning_set)

In [56]:
test_set = build_matrix(test_set)

### Converting ratings to binary value

In [57]:
trainning_set = torch.FloatTensor(trainning_set)
test_set = torch.FloatTensor(test_set)

In [59]:
type(test_set)

torch.Tensor

In [61]:
trainning_set[trainning_set == 0] = -1
trainning_set[trainning_set == 1] = 0
trainning_set[trainning_set == 2] = 0
trainning_set[trainning_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [64]:
rand = torch.randn(1,4)
rand


tensor([[1.6335, 0.9508, 0.6856, 0.1522]])

In [70]:
rand1 = torch.sigmoid(rand)
rand1

tensor([[0.8367, 0.7213, 0.6650, 0.5380]])

In [74]:
torch.bernoulli(rand1)

tensor([[1., 0., 1., 1.]])

### Initializing RBM class

In [106]:
class RBM():
    
    def __init__(self,nv,nh):
        
        self.W = torch.randn(nh,nv)
        self.h_bias = torch.randn(1,nh)
        self.v_bias = torch.randn(1,nv)
        
    def hidden_pass(self,x):
        
        wx = torch.mm(x,self.W.t())
        add_bias = wx + self.h_bias.expand_as(wx)
        p_h_given_v = torch.sigmoid(add_bias)
        return p_h_given_v, torch.bernoulli(p_h_given_v) 
    
    def visible_pass(self,h):
        
        wx = torch.mm(h,self.W)
        add_bias = wx + self.v_bias.expand_as(wx)
        p_v_given_h = torch.sigmoid(add_bias)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    
    def train(self,h0,hk,v0,vk):
        
        self.W += (torch.mm(v0.t(),h0) - torch.mm(vk.t(),hk)).t()
        self.h_bias += torch.sum((h0 - hk), 0)
        self.v_bias += torch.sum((v0 - vk), 0)
    

In [107]:
nv = no_of_movies
nh = 100
bath_size = 100
rbm = RBM(nv,nh)

### Trainning RBM

In [109]:
for k in range(10):


    for id_user in range(0,no_of_users - bath_size,bath_size):

        loss = 0
        counter = 0
        v0 = trainning_set[id_user:id_user+bath_size]
        vk = v0
        h0,_ = rbm.hidden_pass(v0) 

        for i in range(20):

            _,hk = rbm.hidden_pass(vk)
            _,vk = rbm.visible_pass(hk)
            vk[v0<0] = v0[v0<0] #to avoid updating values without original rating

        hk,_ = rbm.hidden_pass(vk)

        rbm.train(h0,hk,v0,vk)
        loss += torch.mean(torch.abs(v0 - vk))
        counter += 1
    
    print("Epoc:{},Loss:{}".format(k,loss/counter))

Epoc:0,Loss:0.017966706305742264
Epoc:1,Loss:0.01577289029955864
Epoc:2,Loss:0.015909630805253983
Epoc:3,Loss:0.016361474990844727
Epoc:4,Loss:0.016361474990844727
Epoc:5,Loss:0.015612366609275341
Epoc:6,Loss:0.01705707423388958
Epoc:7,Loss:0.01503567211329937
Epoc:8,Loss:0.015588585287332535
Epoc:9,Loss:0.015332937240600586


### Testing RBM


In [137]:
test_loss = 0
s = 0.
for id_user in range(no_of_users):
    v = trainning_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.hidden_pass(v)
        _,v = rbm.visible_pass(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
print('test loss:'+str(test_loss/s))

test loss:tensor(0.2493)
