In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
movies = pd.read_csv('ml-1m/movies.dat',sep='::',header= None, engine = 'python',encoding='latin-1')

In [3]:
movies.head()

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
users=pd.read_csv('ml-1m/users.dat',sep='::',header= None, engine = 'python',encoding='latin-1')

In [5]:
ratings=pd.read_csv('ml-1m/ratings.dat',sep='::',header= None, engine = 'python',encoding='latin-1')

In [6]:
ratings

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


In [7]:
training_set=pd.read_csv('ml-100k/u1.base',delimiter='\t')

In [8]:
training_set=np.array(training_set,dtype='int')

In [9]:
test_set=pd.read_csv('ml-100k/u1.test',delimiter='\t')
test_set=np.array(test_set,dtype='int')

In [10]:
nb_users=int(max(max(training_set[:,0]),max(test_set[:,0])))

In [11]:
nb_movies=int(max(max(training_set[:,1]),max(test_set[:,1])))

In [12]:
def convert(data):
    new_data=[]
    for id_users in range(1,nb_users+1):
        id_movies = data[:,1][data[:,0]==id_users]
        id_ratings = data[:,2][data[:,0]==id_users]
        ratings=np.zeros(nb_movies)
        ratings[id_movies-1]=id_ratings
        new_data.append(list(ratings))
    return new_data

In [13]:
training_set=convert(training_set)
test_set=convert(test_set)

In [14]:
training_set=torch.FloatTensor(training_set)
test_set=torch.FloatTensor(test_set)

In [15]:
training_set[training_set==0] =-1

In [16]:
training_set[training_set==1] =0
training_set[training_set==2] =0
training_set[training_set>=3] =1

In [17]:
test_set[test_set==0] =-1
test_set[test_set==1] =0
test_set[test_set==2] =0
test_set[test_set>=3] =1

In [18]:
class RBM():
    def __init__(self,nv,nh):
        self.W= torch.randn(nh,nv)
        self.a= torch.randn(1,nh)
        self.b= torch.randn(1,nv)
    def sample_h(self,x):
        wx=torch.mm(x,self.W.t())
        activation =wx + self.a.expand_as(wx)
        p_h_given_v= torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self,y):
        wy=torch.mm(y,self.W)
        activation =wy + self.b.expand_as(wy)
        p_h_given_v= torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def train(self,v0,vk,ph0,phk):
        self.W +=(torch.mm(v0.t(),ph0)-torch.mm(vk.t(),phk)).t()
        self.b +=torch.sum((v0-vk),0)
        self.a +=torch.sum((ph0-phk),0)

In [19]:
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv,nh)

In [22]:
nb_epoch=10
for epoch in range(1,nb_epoch+1):
    train_loss = 0
    s = 0.
    for id_user in range(0,nb_users-batch_size,batch_size):
        vk=training_set[id_user:id_user+batch_size]
        v0=training_set[id_user:id_user+batch_size]
        ph0,_ = rbm.sample_h(v0)
        for k in range(10):
            _,hk = rbm.sample_h(vk)
            _,vk = rbm.sample_v(hk)
            vk[v0<0] = v0[v0<0]
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0,vk,ph0,phk)
        train_loss += torch.mean(torch.abs(vk[v0>=0]-v0[v0>=0]))
        s += 1.
        print('epoch:'+str(epoch)+' loss:',str(train_loss/s))

epoch:1 loss: tensor(0.2634)
epoch:1 loss: tensor(0.2507)
epoch:1 loss: tensor(0.2446)
epoch:1 loss: tensor(0.2351)
epoch:1 loss: tensor(0.2378)
epoch:1 loss: tensor(0.2458)
epoch:1 loss: tensor(0.2442)
epoch:1 loss: tensor(0.2489)
epoch:1 loss: tensor(0.2496)
epoch:2 loss: tensor(0.2406)
epoch:2 loss: tensor(0.2409)
epoch:2 loss: tensor(0.2365)
epoch:2 loss: tensor(0.2327)
epoch:2 loss: tensor(0.2375)
epoch:2 loss: tensor(0.2457)
epoch:2 loss: tensor(0.2453)
epoch:2 loss: tensor(0.2475)
epoch:2 loss: tensor(0.2488)
epoch:3 loss: tensor(0.2547)
epoch:3 loss: tensor(0.2496)
epoch:3 loss: tensor(0.2375)
epoch:3 loss: tensor(0.2298)
epoch:3 loss: tensor(0.2376)
epoch:3 loss: tensor(0.2428)
epoch:3 loss: tensor(0.2435)
epoch:3 loss: tensor(0.2484)
epoch:3 loss: tensor(0.2468)
epoch:4 loss: tensor(0.2538)
epoch:4 loss: tensor(0.2410)
epoch:4 loss: tensor(0.2357)
epoch:4 loss: tensor(0.2348)
epoch:4 loss: tensor(0.2393)
epoch:4 loss: tensor(0.2443)
epoch:4 loss: tensor(0.2430)
epoch:4 loss: 

In [21]:
torch.randn(2,3)

tensor([[-0.5464,  0.3522,  1.6345],
        [-0.4690,  0.5350, -0.9883]])