<a href="https://colab.research.google.com/github/temiafeye/Colab-Projects/blob/master/Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This recommender system is built using Restriced Boltzmann Machine (RBM)

The data set is obtained from https://grouplens.org/datasets/movielens/




In [0]:
#import libraries
import numpy as np 
import pandas as pd
import torch 
import torch.nn as nn 
import torch.nn.parallel 
import torch.optim as optim 
import torch.utils.data
from torch.autograd import Variable

In [0]:
#import the dataset 
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding ='latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding ='latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding ='latin-1')

In [0]:
#prepare training set and test set 
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
#convert training set to an array
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [0]:
#Getting the number of users and movies 
nb_users = int(max(max(training_set[:,0]), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:, 1])))

In [0]:
#convert the data into an array with user in lines and movies in columns
#the RBM expects this data structure with observations in line and features in columns
def convert(data):
    #create a list of list
    new_data = []
    for id_users in range(1,nb_users + 1):
        #for one user
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        #apply it to the major list
        new_data.append(list(ratings))
    return new_data
training_set = convert(training_set)
test_set = convert(test_set)

#convert the data into Torch Tensors 
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(training_set)


In [0]:
#convert the ratings into binary ratings 1 (Liked) or 0 (Not Liked)
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [0]:
#Creating the architecture of the neural network 
#a probabilistic graphical model 
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv) #initializes a tensor nh, nv according to a normal distribution
        #initialize the bias for hidden node
        self.a = torch.randn(1, nh) #function expects a 2d data structure
        self.b = torch.randn(1, nv) #bias for the visible node
        
    #the probabily of P(h)given vis nothing more than the sigmoid activation function
    def sample_h(self, x):
        #compute probability of h given v 
        wx = torch.mm(x, self.W.t())
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation) #activation function
        return p_h_given_v, torch.bernoulli(p_h_given_v) #returns binary possibilities
    
    #compute visible node given hidden node 
    def sample_v(self, y):
        #compute probability of v given h
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy) #apply bias to each batch of the mini-batch 
        p_v_given_h = torch.sigmoid(activation) #activation function
        return p_v_given_h, torch.bernoulli(p_v_given_h) #returns binary possibilities based on bernollis sampling 
    
    #apply contrastive divergence with Gibbs sampling, used to implement Log-Likelihood Gradient
    #since we have an energy function we are trying to minimize
    #we need to maximize the log-likelihood of the training set
    
    def train(self, v0, vk, ph0, phK):
         self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phK)
         self.b += torch.sum((v0 - vk), 0)
         self.a +=  torch.sum((p0 - phk), 0)

In [0]:
#Create  RBM Object 
nv = len(training_set[0])
nh = 100 #nh is arbitrarily chosen 
batch_size = 100 #batch_size also tunable 
rbm = RBM(nv,nh)

In [0]:
#training the RBM 
#Choose number of epoch 
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
    train_loss = 0 #initialize train loss, we set to zero initially 
    s = 0. #we intend on normalize the train loss, so we divide the train loss by this counter, dtype=float
    for id_user in range(0, nb_users - batch_size, batch_size): #contains, start, stop, range
        vk = training_set[id_user:id_user+batch_size] #iniital input entering into the gibbs chain
        v0 = training_set[id_user:id_user+batch_size] #input is the same as ouput at the beginning
        #third variable is ph0, probablity that the ratings of the movies initially equals 1
        #given the real rating of the movie 
        ph0,_ = rbm.sample_h(v0) # ,_ indicates that we want just eh first element of the returned set
        for k in range(10): #the number of range for the contrastive divergence 
            _,hk = rbm.sample_h(vk)
            _,vk = rbm.sample_v(hk)
            #to freeze the visible nodes that have -1 rating, not originally rated 
            vk[v0<0] = v0[v0<0] #taking the original -1 ratings
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        #to measure the train loss
        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vK[v0 >= 0]))
        #update the counter
        s += 1.
    #we include the print function at every epoch step, thus the print function 
    #is included in the for loop
    print('epoch: ' +str(epoch)+' loss: '+str(train_loss/s))
        