<a href="https://colab.research.google.com/github/omidkhalafbeigi/rbm/blob/main/RBM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import pandas as pd
import numpy as np
from google.colab import drive
from tensorflow.keras import datasets
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
def load_rs_dataset():
  original_set = data = np.array(pd.read_csv('drive/MyDrive/RS_Dataset/u1.base', sep='\t', engine='python', header=None).iloc[:, :-1]) 
  test = np.array(pd.read_csv('drive/MyDrive/RS_Dataset/u1.test', sep='\t', engine='python', header=None).iloc[:, :-1])
  users = np.array(pd.read_csv('drive/MyDrive/RS_Dataset/users.dat', sep='::', engine='python', header=None))

  users_count = len(set(data[:, 0]))
  movies_count = max(max(data[:, 1]), max(test[:, 1]))
  training_set = torch.empty(size=(users_count, movies_count), dtype=torch.float32)

  for sample in data:
    user_index = sample[0] 
    movie_index = sample[1]
    rating = sample[2]

    training_set[user_index - 1, movie_index - 1] = rating

  for sample_index in range(training_set.shape[0]):
    sample = training_set[sample_index]
  
    training_set[sample_index][np.argwhere(sample == 0)]  = -1
    training_set[sample_index][np.argwhere(sample == 1)]  = 0
    training_set[sample_index][np.argwhere(sample == 2)]  = 0
    training_set[sample_index][np.argwhere(sample == 3)]  = 0
    training_set[sample_index][np.argwhere(sample > 3)]  = 1

  return original_set, training_set

In [3]:
def train(training_set, hidden_nodes, epochs, batch_size, k_steps, learning_rate):
  w = torch.randn(training_set.shape[1], hidden_nodes, dtype=torch.float32)
  c = torch.randn(1, w.shape[1], dtype=torch.float32) # Bias for hidden nodes
  b = torch.randn(1, training_set.shape[1], dtype=torch.float32) # Bias for visible nodes

  for epoch in range(epochs):
    for batch in range(batch_size, training_set.shape[0], batch_size):
      vk = v0 = training_set[batch - batch_size:batch]
      h0 = torch.sigmoid(torch.matmul(v0, w) + c)
      for k in range(k_steps):
        hk = torch.bernoulli(torch.sigmoid(torch.matmul(vk, w) + c))
        vk = torch.bernoulli(torch.sigmoid(torch.matmul(hk, w.t()) + b))

      hk = torch.sigmoid(torch.matmul(vk, w) + c)

      w += (learning_rate * (torch.matmul(v0.t(), h0) - torch.matmul(vk.t(), hk)))
      b += (learning_rate * torch.sum((v0 - vk), axis=0)) # Sum over all rows (dim=1 for columns)
      c += (learning_rate * torch.sum((h0 - hk), axis=0))

    print(f'Epoch: {epoch + 1}')

  return {'w': w, 'b': b, 'c': c}

In [4]:
def predict(test, w, b, c, k_steps):
    hk = h0 = torch.sigmoid(torch.matmul(test, w) + c)
    for k in range(k_steps):
      vk = torch.sigmoid(torch.matmul(hk, w.t()) + b)
      hk = torch.sigmoid(torch.matmul(vk, w) + c)

    return vk

In [5]:
def get_accuracy(predicted_array, true_array, original_set):
  diffrences = list()

  for sample in original_set:
    user_idx = sample[0] - 1
    movie_idx = sample[1] - 1
    rating = sample[2]
    diffrences.append(int(torch.bernoulli(predicted_array[user_idx, movie_idx]) == torch.bernoulli(true_array[user_idx, movie_idx])))

  diffrences = np.array(diffrences)
  accuracy = (len(diffrences[diffrences == 1]) / diffrences.shape[0])
  
  return accuracy

In [6]:
original_set, training_set = load_rs_dataset()

In [None]:
k_steps = 10
model = train(training_set, hidden_nodes=200, epochs=100, batch_size=256, k_steps=k_steps, learning_rate=0.01)
w, b, c = model['w'], model['b'], model['c']