## Boltzmann Machines

#### Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

#### Importing the dataset

In [2]:
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

#### Preparing the training set and the test set

In [3]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

#### Getting the number of users and movies

In [4]:
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

In [5]:
nb_users

943

In [6]:
nb_movies

1682

#### Converting the data into an array with users in lines and movies in columns

In [7]:
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data
training_set = convert(training_set)
test_set = convert(test_set)


#### Converting the data into Torch tensors

Lines= observation into the network, features = columns the input nodes

Architecture is created with tensors = arrays that contains elements of a single data type, tensor is multidimensional matrix that is a pytorch array  

In [10]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

#### Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)

In [11]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [12]:
training_set

tensor([[-1.,  1.,  1.,  ..., -1., -1., -1.],
        [ 1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [ 1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1.,  1., -1.,  ..., -1., -1., -1.]])

#### Creating the architecture of the Neural Network

Boltzmann Machine is a probabilistic graphical model

BM usigin RBM ( Restricted Boltzmann Machine Architecture)

##### 1 Function inside the class:

init > Number of hidden nodes , the weights the probability of the visible nodes given the hidden nodes, bias for the same probability and also the bias for the visibles nodes given the hidden nodes
nh: Hidden nodes, nv: Visible nodes, W= weight.

self.W=torch.randn = initialize the function randomly(Normal Distribution)

self.a=torch.randn initializethe function randomly(Normal Distribution)- a= bias, vector of nh element, (1,nh) two dimension 1 bias 2 vector, (pytorch tensor)cannot accept 1 dimension.

self.b=torch.randn initializethe function randomly(Normal Distribution)- a= bias, vector of nv element, (1,nv) two dimension 1 bias 2 vector, (pytorch tensor)cannot accept 1 dimension.- For the visible nodes

##### 2 Function (Sample): ( the hidden nodes)

probabilities of the hidden nodes given visibles nodes, sigmoid activation function, is used cuz during the training we aproximate the loglikelyhood gradient, using gip sampling

1) Probability of H given v: sigmoid activation function, apply to wx = torch.mm(x, self.W.t())

2) Inside the activation function  wx plus the bias is a linear function of the neuron.activation = wx + self.a.expand_as(wx), batches= expand_as(wx) the bias is apply to each line of the mini batch.

3) Activate the hidden nodes: probability will be activate  (p_h_given_v > H of given V)  = torch.sigmoid(activation)

4) retun probability and the sample: bernoulli rvn cuz we are predicting and binary outcome, whether the user like or not (0 or 1) return p_h_given_v, torch.bernoulli(p_h_given_v), probability the hidden is activated


##### 3 Funtion (Sample V):  The visible nodes

the probabilities of the visibles nodes given the hidden nodes P=1. Outcome the predicting rating.
same data from sampleh

##### 4 Function Train - Contrastive divergent - aprox lilkelyhood grad

Energy based model, we need to minimize the weight, the goal is to maximize the log-likelyhood
we need to compute the gradient (aprox), usig Gibbs sampling algo =  K-steps constrative divergent

In [14]:
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv)
    def sample_h(self, x):
        wx = torch.mm(x, self.W.t())
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self, y):
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    def train(self, v0, vk, ph0, phk):
        self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)

In [15]:
nv = len(training_set[0])
nh = 100 #number of features
batch_size = 100 # Based on number of observations
rbm = RBM(nv, nh) # creatinf RBM object 

#### Training the RBM

In [None]:
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user+batch_size]
        v0 = training_set[id_user:id_user+batch_size]
        ph0,_ = rbm.sample_h(v0)
        for k in range(10):
            _,hk = rbm.sample_h(vk)
            _,vk = rbm.sample_v(hk)
            vk[v0<0] = v0[v0<0]
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        train_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        s += 1.
    print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))


#### Testing the RBM

In [None]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
print('test loss: '+str(test_loss/s))