### Packages Required

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

### Datasets

In [2]:
path = 'C:/Users/Nithin/Downloads/Movie Recommendation_AI_Boltzman/'
movies = pd.read_csv(path + '/Datasets/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv(path + '/Datasets/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv(path + '/Datasets/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

In [4]:
training_set = pd.read_csv(path + 'Datasets/train.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv(path + 'Datasets/test.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [10]:
print(training_set)
print('------------------')
print(test_set)

[[        1         2         3 876893171]
 [        1         3         4 878542960]
 [        1         4         3 876893119]
 ...
 [      943      1188         3 888640250]
 [      943      1228         3 888640275]
 [      943      1330         3 888692465]]
------------------
[[        1        10         3 875693118]
 [        1        12         5 878542960]
 [        1        14         5 874965706]
 ...
 [      459       934         3 879563639]
 [      460        10         3 882912371]
 [      462       682         5 886365231]]


In [11]:
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

print('user count :' ,nb_users)

print('Movie count :' ,nb_movies)

user count : 943
Movie count : 1682


In [12]:
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data
training_set = convert(training_set)
test_set = convert(test_set)

print(pd.DataFrame(training_set))

print(pd.DataFrame(test_set))

     0     1     2     3     4     5     6     7     8     9     ...  1672  \
0     0.0   3.0   4.0   3.0   3.0   0.0   4.0   1.0   5.0   0.0  ...   0.0   
1     4.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   2.0  ...   0.0   
2     0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
3     0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
4     0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
..    ...   ...   ...   ...   ...   ...   ...   ...   ...   ...  ...   ...   
938   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   5.0   0.0  ...   0.0   
939   0.0   0.0   0.0   2.0   0.0   0.0   4.0   5.0   3.0   0.0  ...   0.0   
940   5.0   0.0   0.0   0.0   0.0   0.0   4.0   0.0   0.0   0.0  ...   0.0   
941   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
942   0.0   5.0   0.0   0.0   0.0   0.0   0.0   0.0   3.0   0.0  ...   0.0   

     1673  1674  1675  1676  1677  1678  1679  1680  1681  
0  

In [13]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

print(training_set)

print(test_set)

tensor([[0., 3., 4.,  ..., 0., 0., 0.],
        [4., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [5., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 5., 0.,  ..., 0., 0., 0.]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [14]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
#training_set[training_set == 3] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
#test_set[test_set == 3] = 0
test_set[test_set >= 3] = 1

In [15]:
training_set

tensor([[-1.,  1.,  1.,  ..., -1., -1., -1.],
        [ 1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [ 1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1.,  1., -1.,  ..., -1., -1., -1.]])

In [16]:
test_set

tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])

In [17]:
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv)
    def sample_h(self, x):
        wx = torch.mm(x, self.W.t())
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self, y):
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    def train(self, v0, vk, ph0, phk):
        #self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv, nh)

In [18]:
nb_epoch = 20
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user+batch_size]
        v0 = training_set[id_user:id_user+batch_size]
        ph0,_ = rbm.sample_h(v0)
        for k in range(10):
            _,hk = rbm.sample_h(vk)
            _,vk = rbm.sample_v(hk)
            vk[v0<0] = v0[v0<0]
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        
        train_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        s += 1.
    print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

epoch: 1 loss: tensor(0.3424)
epoch: 2 loss: tensor(0.2500)
epoch: 3 loss: tensor(0.2521)
epoch: 4 loss: tensor(0.2470)
epoch: 5 loss: tensor(0.2487)
epoch: 6 loss: tensor(0.2481)
epoch: 7 loss: tensor(0.2479)
epoch: 8 loss: tensor(0.2483)
epoch: 9 loss: tensor(0.2499)
epoch: 10 loss: tensor(0.2468)
epoch: 11 loss: tensor(0.2464)
epoch: 12 loss: tensor(0.2445)
epoch: 13 loss: tensor(0.2499)
epoch: 14 loss: tensor(0.2485)
epoch: 15 loss: tensor(0.2446)
epoch: 16 loss: tensor(0.2470)
epoch: 17 loss: tensor(0.2491)
epoch: 18 loss: tensor(0.2494)
epoch: 19 loss: tensor(0.2474)
epoch: 20 loss: tensor(0.2477)


In [19]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.2413)


In [20]:
def movie_recos(id_user):
    input = training_set[id_user:id_user+1]
    target = test_set[id_user:id_user+1]
    if len(target[target>=0]) > 0:
        _,h = rbm.sample_h(input)
        _,output = rbm.sample_v(h)
        movie_ratings = pd.DataFrame(output.cpu().detach().numpy()).T
        movie_ratings.columns = ['Pred_Ratings']
        target_ratings = pd.DataFrame(target.cpu().detach().numpy()).T
        target_ratings.columns = ['Actual_Ratings']
        Final_Movie_recos = pd.concat([movie_ratings, target_ratings], axis=1)
        Final_Movie_recos['movie_id'] = Final_Movie_recos.index
        Final_Movie_recos = Final_Movie_recos[Final_Movie_recos.Actual_Ratings==-1]
        Final_Movie_recos = Final_Movie_recos[Final_Movie_recos.Pred_Ratings==1]
        return Final_Movie_recos

In [21]:
Final_Movie_recos = movie_recos(5)

In [22]:
Final_Movie_recos

Unnamed: 0,Pred_Ratings,Actual_Ratings,movie_id
0,1.0,-1.0,0
3,1.0,-1.0,3
4,1.0,-1.0,4
6,1.0,-1.0,6
7,1.0,-1.0,7
...,...,...,...
1673,1.0,-1.0,1673
1674,1.0,-1.0,1674
1676,1.0,-1.0,1676
1678,1.0,-1.0,1678


In [23]:
movies.columns = ['movie_id', 'movie_name', 'type']

movies.head()

Unnamed: 0,movie_id,movie_name,type
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [24]:
pd.merge(Final_Movie_recos, movies, on='movie_id', how='inner')

Unnamed: 0,Pred_Ratings,Actual_Ratings,movie_id,movie_name,type
0,1.0,-1.0,3,Grumpier Old Men (1995),Comedy|Romance
1,1.0,-1.0,4,Waiting to Exhale (1995),Comedy|Drama
2,1.0,-1.0,6,Heat (1995),Action|Crime|Thriller
3,1.0,-1.0,7,Sabrina (1995),Comedy|Romance
4,1.0,-1.0,8,Tom and Huck (1995),Adventure|Children's
...,...,...,...,...,...
1012,1.0,-1.0,1673,Boogie Nights (1997),Drama
1013,1.0,-1.0,1674,Witness (1985),Drama|Romance|Thriller
1014,1.0,-1.0,1676,Starship Troopers (1997),Action|Adventure|Sci-Fi|War
1015,1.0,-1.0,1678,"Joy Luck Club, The (1993)",Drama
