In [1]:
!pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [3]:
movies = pd.read_csv("/content/drive/MyDrive/Boltzmann's Machine/Boltzmann_Machines/ml-1m/movies.dat",sep = "::", engine='python',encoding='latin-1',header=None)

In [4]:
ratings = pd.read_csv("/content/drive/MyDrive/Boltzmann's Machine/Boltzmann_Machines/ml-1m/ratings.csv",sep = '::',header = None , engine = 'python' , encoding = 'latin-1')

In [5]:
users = pd.read_csv("/content/drive/MyDrive/Boltzmann's Machine/Boltzmann_Machines/ml-1m/users.dat",sep = '::', header= None , engine = 'python' , encoding = 'latin-1')

In [18]:
training_set = pd.read_csv("/content/drive/MyDrive/Boltzmann's Machine/Boltzmann_Machines/ml-100k/u4.base" , delimiter='\t')
training_set = np.array(training_set)

In [19]:
test_set = pd.read_csv("/content/drive/MyDrive/Boltzmann's Machine/Boltzmann_Machines/ml-100k/u5.test" , delimiter = '\t')
test_set = np.array(test_set)

In [20]:
print(training_set[: , 0])

[  1   1   1 ... 943 943 943]


In [21]:
print(training_set[: , 1])

[   2    3    5 ... 1047 1228 1330]


In [22]:
nb_users = int(max(max(training_set[: , 0]),max(test_set[: , 0])))
print(nb_users)

943


In [23]:
nb_movies = int(max(max(training_set[: , 1]),max(test_set[: , 1])))
print(nb_movies)

1682


In [24]:
# Converting the train and test dataset into matrix where rows represent the users and
# column represent the features(movies) and cells represent the rating given by the users

def convert(data):
  new_data = []
  for id_user in range(1,nb_users+1):
    id_movie = data[: , 1][data[: , 0]==id_user]
    id_rating = data[: , 2][data[: , 0]==id_user]
    ratings = np.zeros(nb_movies)
    ratings[id_movie -1] = id_rating
    new_data.append(list(ratings))
  return new_data

In [25]:
train_set = convert(training_set)
test_set = convert(test_set)


In [26]:
len(train_set)  , len(test_set)

(943, 943)

In [27]:
len(train_set[0]) , len(test_set[0])

(1682, 1682)

In [28]:
train_set = torch.FloatTensor(train_set)
test_set = torch.FloatTensor(test_set)

In [29]:
train_set[train_set == 0] = -1
train_set[train_set == 1] = 0
train_set[train_set == 2] = 0
train_set[train_set >= 3] = 1

In [30]:
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1


In [38]:
class RBM():
  def __init__(self , nv,nh): #nv = no of visible nodes and nh = no of hidden nodes
    self.W = torch.randn(nh,nv) # weight probabilites of visible nodes when hidden node is given
    self.a = torch.randn(1,nh) # bias probability of hidden node and batch_size = 1
    self.b = torch.randn(1,nv) # bias probability of visible node and batch_size = 1

  def sample_h(self , x):
    wx = torch.mm(x , self.W.t())
    activation = wx + self.a.expand_as(wx)
    p_h_given_v = torch.sigmoid(activation) # Sigmoid Activation
    return p_h_given_v , torch.bernoulli(p_h_given_v)

  def sample_v(self,y):
    wy = torch.mm(y,self.W)
    activation = wy + self.b.expand_as(wy)
    p_v_given_h = torch.sigmoid(activation)
    return p_v_given_h , torch.bernoulli(p_v_given_h)
  def train(self, v0, vk, ph0, phk):
    self.W += torch.mm(ph0,v0) - torch.mm(phk , vk)
    self.b += torch.sum((v0 - vk), 0)
    self.a += torch.sum((ph0 - phk), 0)




In [39]:
nv = len(train_set[0])
nh = 150
print(nh) , print(nv)

150
1682


(None, None)

In [42]:
batch_size = 150
rbm = RBM(nv,nh)

In [43]:
nb_epoch = 15
for epoch in range(1,nb_epoch+1):
  train_loss = 0
  s = 0.
  for id_user in range(0 , nb_users-batch_size , batch_size):
    vk = train_set[id_user : id_user + batch_size]
    v0 = train_set[id_user : id_user + batch_size]
    ph0 , _ = rbm.sample_h(v0)
    for k in range(10):
      _ , hk = rbm.sample_h(vk)
      _ , vk = rbm.sample_v(hk)
      vk[v0<0] = v0[v0<0]
    phk , _ = rbm.sample_h(vk)
    rbm.train(v0,vk,ph0,phk)

    train_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
    s+=1.0
  print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))




epoch: 1 loss: tensor(0.3121)
epoch: 2 loss: tensor(0.2240)
epoch: 3 loss: tensor(0.2363)
epoch: 4 loss: tensor(0.2431)
epoch: 5 loss: tensor(0.2484)
epoch: 6 loss: tensor(0.2476)
epoch: 7 loss: tensor(0.2511)
epoch: 8 loss: tensor(0.2456)
epoch: 9 loss: tensor(0.2496)
epoch: 10 loss: tensor(0.2458)
epoch: 11 loss: tensor(0.2521)
epoch: 12 loss: tensor(0.2548)
epoch: 13 loss: tensor(0.2548)
epoch: 14 loss: tensor(0.2569)
epoch: 15 loss: tensor(0.2568)


In [45]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = train_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0])) # Average Distance here
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.2329)
