In [68]:
import numpy as np
import pandas as pd

In [69]:
# Data Preprocessing
training_frame = pd.read_csv("./ml-1m/training_set.csv").values
test_frame = pd.read_csv("./ml-1m/test_set.csv").values

training_set = {}
test_set = {}

nb_movies = max(max(training_frame[:,1]), max(test_frame[:,1]))

for i in training_frame:
    if i[0] not in training_set:
        training_set[i[0]] = np.zeros(nb_movies)
    training_set[i[0]][i[1] - 1] = i[2]
    
for i in test_frame:
    if i[0] not in test_set:
        test_set[i[0]] = np.zeros(nb_movies)
    test_set[i[0]][i[1] - 1] = i[2]

training_set = np.array(list(training_set.values()))
test_set = np.array(list(test_set.values()))


In [70]:
print(training_set.shape)
print(test_set.shape)

(6040, 3952)
(6040, 3952)


In [71]:
# make Data Suitable for RBM

training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [74]:
# create my RBM class model
class RBM():
    def __init__(self, nv, nh):
        self.W = np.random.normal(size=(nh, nv))
        self.b = np.random.normal(size=(nv, 1))
        self.c = np.random.normal(size=(nh, 1))
        self.batch_size = 1
        self.rate = 1
        self.k = 1
    def sigmoid(self, X):
       return 1.0/ (1.0 + np.exp(-X))
    def sample_h(self, x):
        # x is in form of t * nv
        activation = np.dot(self.W, np.transpose(x)) + self.c 
        # p_h_given_v is in form of nh * t, so we transposed it
        p_h_given_v = np.transpose(self.sigmoid(activation))        
        return p_h_given_v, np.random.binomial(n = 1, p = p_h_given_v) 
    def sample_v(self, y):
        # y is in form of t * nh
        activation = np.dot(self.W.transpose(), np.transpose(y)) + self.b 
        # p_v_given_h is in form of nv * t, so we transposed it
        p_v_given_h = np.transpose(self.sigmoid(activation))        
        return p_v_given_h, np.random.binomial(n = 1, p = p_v_given_h)
    def train(self, v0, vk, ph0, phk):
        # v0, vk is in form of t * nv
        # ph0, phk is in form of t * nh
        self.W += self.rate * (np.dot(ph0.transpose(), v0) - np.dot(phk.transpose(), vk)) / self.batch_size
        self.b += self.rate * (np.dot((v0 - vk).transpose(), np.ones((self.batch_size, 1)))) / self.batch_size
        self.c += self.rate * (np.dot((ph0 - phk).transpose(), np.ones((self.batch_size, 1)))) / self.batch_size
    def compile(self, rate = 1, k = 2):
        self.rate = rate
        self.k = k
    def loss(self, x):
        # v0 is in form of t * nv
        vk = x
        for k in range(self.k):
            _, h = self.sample_h(vk)
            _, vk = self.sample_v(h)
            # freeze the -1 numbers
            vk[x < 0] = x[x < 0]
        loss = 0.0
        diff = (vk - x)
        for i in diff:
            loss += (np.dot(i , i.transpose())) / len(i)
        loss /= len(x)
        return loss
    def fit(self, X, batch_size, epochs):
        # X is in form of n sample * m feature
        self.batch_size = batch_size
        self.epochs = epochs
        for i in range(epochs):
            print("Start Epoch : ", i)
            np.random.shuffle(X)
            s = 0
            loss = 0.0
            m = int((len(X) / self.batch_size) / 50)
            for j in range(0, len(X) - self.batch_size, self.batch_size):
                v0 = X[j:j + batch_size]
                vk = v0
                for k in range(self.k):
                    _, h = self.sample_h(vk)
                    _, vk = self.sample_v(h)
                    # freeze the -1 numbers
                    vk[v0 < 0] = v0[v0 < 0]
                ph0,_ = self.sample_h(v0)
                phk,_ = self.sample_h(vk)
                self.train(v0, vk, ph0, phk)
                loss += self.loss(v0)
                
                s += 1
                if(s % m == 0):
                    print("#",end='')
            print("\nloss : ", loss / s) 
    def predict(self, X):
        # X is in form of n sample * m feature
        for i in range(len(X)):
            _, h = self.sample_h(X[i])
            _, X[i] = self.sample_v(h)
        return X

In [75]:
# create model and train it

rbm = RBM(len(training_set[0]), 100)
rbm.compile(rate = 1, k = 5)
rbm.fit(training_set, batch_size = 32, epochs = 10)

Start Epoch :  0
##############################################################
loss :  0.007811616727862005
Start Epoch :  1
##############################################################
loss :  0.007141928204948744
Start Epoch :  2
##############################################################
loss :  0.007038921849427168
Start Epoch :  3
##############################################################
loss :  0.007024032404815227
Start Epoch :  4
##############################################################
loss :  0.006968260078387458
Start Epoch :  5
##############################################################
loss :  0.006946346517249546
Start Epoch :  6
##############################################################
loss :  0.006948028940369543
Start Epoch :  7
##############################################################
loss :  0.006987103217331383
Start Epoch :  8
##############################################################
loss :  0.00695538954151951
Start Epoch :  9
###

In [76]:
# predict our model
y = rbm.predict(test_set)
loss = np.mean(np.abs(training_set[training_set >= 0] - y[training_set >= 0]))
loss

TypeError: only integer scalar arrays can be converted to a scalar index

In [61]:
a = training_set[2]
a = a[a >= 0]

b = test_set[2]
b = b[b >= 0]

c = y[2]
c = c[c >= 0]

In [62]:
a

array([1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.

In [63]:
b

array([1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0.])

In [64]:
c

array([1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0.])

In [65]:
for i in c:
    i += 1
c

array([1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0.])