In [11]:
import torch
import math
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from torch.utils.data import DataLoader

In [12]:
file = "../ml-100k/u.data"

df = pd.read_csv(file, sep='\t', header=None)
df.columns = "uid iid rating timestamp".split()
df.drop('timestamp', axis=1, inplace=True)
print(df.head())


   uid  iid  rating
0  196  242       3
1  186  302       3
2   22  377       1
3  244   51       2
4  166  346       1


In [13]:
n_users = df['uid'].nunique()
n_items = df['iid'].nunique()
print("num users {}, num items {}".format(n_users, n_items))

num users 943, num items 1682


In [14]:
X = df.iloc[:, :-1].values
target = df.iloc[:, -1].values

print(type(X))
print(type(target))
print(X[:10])
print(target[:10])
print(X[0][0], X[0][1], target[0])


<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[[196 242]
 [186 302]
 [ 22 377]
 [244  51]
 [166 346]
 [298 474]
 [115 265]
 [253 465]
 [305 451]
 [  6  86]]
[3 3 1 2 1 4 2 5 3 3]
196 242 3


In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.20, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)


(80000, 2)
(80000,)
(20000, 2)
(20000,)


In [16]:
print(X_train[:10])
print(X_train[:, 0])
print(X_train[:, 1])

[[ 807 1411]
 [ 474  659]
 [ 463  268]
 [ 139  286]
 [ 621  751]
 [ 264  137]
 [ 262  219]
 [   7  300]
 [ 653  393]
 [ 235  198]]
[807 474 463 ... 437 284 222]
[1411  659  268 ...  475  322  200]


In [17]:
print(y_train)
rating = torch.from_numpy(y_train).view(-1, 1)
print(rating)


[1 5 4 ... 3 3 3]
tensor([[1],
        [5],
        [4],
        ...,
        [3],
        [3],
        [3]])


In [18]:
class MovieLens(Dataset):

    def __init__(self, X, y):
        self.users = torch.from_numpy(X[:, 0]).type(torch.LongTensor)
        self.items = torch.from_numpy(X[:, 1]).type(torch.LongTensor)
        self.ratings = torch.from_numpy(y).view(-1, 1).type(torch.FloatTensor)
        self.n_samples = self.users.shape[0]

    def __getitem__(self, index):
        return self.users[index], self.items[index], self.ratings[index]

    def __len__(self):
        return self.n_samples

In [19]:
bs = 256

train_dataset = MovieLens(X_train, y_train)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=bs, shuffle=True, num_workers=2)

In [20]:
test_dataset = MovieLens(X_test, y_test)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=bs, shuffle=False, num_workers=2)

In [21]:
train_dataiter = iter(train_dataloader)
data = train_dataiter.next()
users, items, ratings = data
#print(users)
#print(items)
#print(ratings)

In [22]:
test_dataiter = iter(test_dataloader)
data = test_dataiter.next()
users, items, ratings = data
#print(users)
#print(items)
#print(ratings)



In [23]:
embed_dim= 50
bias_dim = 1
ratings_range = (0, 5.5)

In [24]:
class NCF(nn.Module):
    def __init__(self, users_size, items_size, embed_dim ,bias_dim, ratings_range):
        super(NCF, self).__init__()
        self.embed_dim = embed_dim
        self.ratings_range = ratings_range
        self.embedding_user = nn.Embedding(users_size+1, embed_dim)
        self.embedding_item = nn.Embedding(items_size+1, embed_dim)
        self.bias_user = nn.Embedding(users_size+1, bias_dim)
        self.bias_item = nn.Embedding(items_size+1, bias_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, users, items):
        u_emb = self.embedding_user(users)
        i_emb = self.embedding_item(items)
        u_bias = self.bias_user(users)
        i_bias = self.bias_item(items)

        dot = torch.bmm(u_emb.view(-1, 1, self.embed_dim), i_emb.view(-1, self.embed_dim, 1))
        dot = dot.squeeze()

        res = dot + u_bias.squeeze() + i_bias.squeeze()
        pred = self.sigmoid(res) * (self.ratings_range[1]-self.ratings_range[0] + self.ratings_range[0])

        return pred

In [26]:
print(ratings_range[1]-ratings_range[0] + ratings_range[0])



5.5


In [27]:
model = NCF(n_users, n_items, embed_dim, bias_dim, ratings_range)

In [28]:
learning_rate = 0.001
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [29]:
num_epochs = 4
total_samples = len(train_dataset)
n_iterations = math.ceil(total_samples / bs)
print(total_samples, n_iterations)

for epoch in range(num_epochs):
    for i, (users, items, ratings) in enumerate(train_dataloader):
        loss_step = 0.0
        rating_preds = model(users, items)
        #print(rating_preds.type())
        #print(ratings.type())
        #print()
        #print()
        optimizer.zero_grad()
        loss = criterion(rating_preds, ratings)
        loss.backward()
        optimizer.step()

        loss_step += loss.item()

        if (i+1) % 100 == 0:
            print("epoch {}/{}, step {}/{}, loss {}".format(
                        epoch+1, num_epochs, i+1,n_iterations, loss_step/users.shape[0]))

80000 313
epoch 1/4, step 100/313, loss 0.03086385875940323
epoch 1/4, step 200/313, loss 0.029513316228985786
epoch 1/4, step 300/313, loss 0.029406651854515076
epoch 2/4, step 100/313, loss 0.027330242097377777
epoch 2/4, step 200/313, loss 0.030482186004519463
epoch 2/4, step 300/313, loss 0.029925281181931496
epoch 3/4, step 100/313, loss 0.030874887481331825
epoch 3/4, step 200/313, loss 0.02982785925269127
epoch 3/4, step 300/313, loss 0.030129769816994667
epoch 4/4, step 100/313, loss 0.02955903857946396
epoch 4/4, step 200/313, loss 0.0291665717959404
epoch 4/4, step 300/313, loss 0.03188612312078476


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [34]:
userid = 244
item_missig = 51
userid = torch.LongTensor([userid])
itemid = torch.LongTensor([item_missig])

with torch.no_grad():
    rating_pred = model(userid, itemid)
    print("Predicted rating: {}".format(rating_pred))

Predicted rating: 0.2023475468158722
