In [None]:
"""

https://github.com/yihong-chen/neural-collaborative-filtering/tree/master/src

"""

In [1]:
import torch
import math
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from torch.utils.data import DataLoader

In [5]:
file = "../ml-100k/u.data"

df = pd.read_csv(file, sep='\t', header=None)
df.columns = "uid iid rating timestamp".split()
df.drop('timestamp', axis=1, inplace=True)
print(df.head())


   uid  iid  rating
0  196  242       3
1  186  302       3
2   22  377       1
3  244   51       2
4  166  346       1


In [6]:
n_users = df['uid'].nunique()
n_items = df['iid'].nunique()
print("num users {}, num items {}".format(n_users, n_items))

num users 943, num items 1682


In [7]:
X = df.iloc[:, :-1].values
target = df.iloc[:, -1].values

print(type(X))
print(type(target))
print(X[:10])
print(target[:10])
print(X[0][0], X[0][1], target[0])

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[[196 242]
 [186 302]
 [ 22 377]
 [244  51]
 [166 346]
 [298 474]
 [115 265]
 [253 465]
 [305 451]
 [  6  86]]
[3 3 1 2 1 4 2 5 3 3]
196 242 3


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.20, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)


(80000, 2)
(80000,)
(20000, 2)
(20000,)


In [9]:
print(X_train[:10])
print(X_train[:, 0])
print(X_train[:, 1])

[[ 807 1411]
 [ 474  659]
 [ 463  268]
 [ 139  286]
 [ 621  751]
 [ 264  137]
 [ 262  219]
 [   7  300]
 [ 653  393]
 [ 235  198]]
[807 474 463 ... 437 284 222]
[1411  659  268 ...  475  322  200]


In [10]:
print(y_train)
rating = torch.from_numpy(y_train).view(-1, 1)
print(rating)


[1 5 4 ... 3 3 3]
tensor([[1],
        [5],
        [4],
        ...,
        [3],
        [3],
        [3]])


In [11]:
class MovieLens(Dataset):

    def __init__(self, X, y):
        self.users = torch.from_numpy(X[:, 0]).type(torch.LongTensor)
        self.items = torch.from_numpy(X[:, 1]).type(torch.LongTensor)
        self.ratings = torch.from_numpy(y).view(-1, 1).type(torch.FloatTensor)
        self.n_samples = self.users.shape[0]

    def __getitem__(self, index):
        return self.users[index], self.items[index], self.ratings[index]

    def __len__(self):
        return self.n_samples

In [33]:
bs = 256

train_dataset = MovieLens(X_train, y_train)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=bs, shuffle=True, num_workers=2)

In [34]:
test_dataset = MovieLens(X_test, y_test)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=bs, shuffle=False, num_workers=2)

In [35]:
train_dataiter = iter(train_dataloader)
data = train_dataiter.next()
users, items, ratings = data
print(users)
print(items)
print(ratings)

tensor([533, 659, 374, 780,  95, 414, 374,  96, 234, 528, 928, 940, 385, 707,
         60, 221, 100, 669, 669,  60, 883, 802, 618, 429,  43, 174, 664, 457,
        911,  82,   7, 843, 794,  42, 480,  60, 486, 627, 625, 848, 407,  84,
        647, 116, 181, 321, 295, 901, 628, 545, 729,  12, 626, 552,   7, 290,
        880,  62, 174,  82, 456, 850, 788, 378,  58, 381, 334, 541,  95, 137,
        618, 385, 316, 254, 417, 719, 314, 835, 601, 625, 293, 243, 449, 315,
        931, 931, 497, 326, 874, 839, 620, 283, 330,  94, 790, 921, 500, 254,
        121,  12,  92, 312, 542, 383, 379, 305, 891, 210, 545, 345, 865, 499,
         75, 481,   5, 440, 682,   5, 185, 230, 540,  27, 342, 423, 456,  59,
        192, 488, 320, 690, 394, 457, 236,  28, 416, 215, 181, 826, 328, 548,
        769, 276, 316, 331, 825, 886, 267, 457, 354, 136, 130, 405, 766, 661,
        476, 547, 848, 406, 276, 915, 653, 907,   5, 839,  59, 429, 725, 672,
        308, 318, 361, 485, 256, 223, 562,  13, 279, 782, 653, 3

In [36]:
test_dataiter = iter(test_dataloader)
data = test_dataiter.next()
users, items, ratings = data
print(users)
print(items)
print(ratings)


tensor([877, 815,  94, 416, 500, 259, 598, 886, 837, 521, 459, 622, 655, 128,
        308, 930,  43,  42, 450, 521, 698,  13, 796, 308, 322, 276, 537, 933,
        852, 416, 859, 892, 378, 350, 393, 240,  13, 165, 184, 296, 213, 217,
        515, 487, 416, 455, 712, 748, 313,  77, 474, 373, 312, 919, 698, 629,
        868, 899, 847,  59, 634, 516, 357, 883, 533, 666, 387, 716, 503, 328,
         26, 533, 611, 303, 327, 301, 577, 749, 727, 589, 399, 766, 235, 861,
        648, 938, 593, 837, 916, 410, 634, 721, 381, 120, 848, 854, 896, 539,
        268, 185, 747, 526, 667, 717,  49, 592, 330, 189,  83, 904, 705, 846,
        851, 764, 868, 886, 823, 449,  59, 634,  10, 543, 905, 450, 463, 463,
        429, 406, 318, 560, 429, 327, 427, 483, 911, 405, 227, 870, 493, 320,
        161, 128, 698, 125, 555, 509,   2, 679, 880,  83, 880, 446, 171,   6,
        772, 631, 293, 305, 563, 348, 524, 693, 682, 427, 326, 350, 437, 291,
        660, 132, 177,  42, 883, 843, 544, 935, 329, 908, 627, 4

In [37]:
embed_dim= 16
bias_dim = 1
ratings_range = (0, 5.5)

In [38]:
### This is from the github repository

class NCF(nn.Module):
    def __init__(self, num_users, num_items, latent_dim):
        super(NCF, self).__init__()
        self.num_users = num_users+1    # +1 because they start at 1
        self.num_items = num_items+1    # +1 because they satart at 1
        self.latent_dim = latent_dim

        self.embedding_user = nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim)
        self.embedding_item = nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.latent_dim)

        self.affine_output = nn.Linear(in_features=self.latent_dim, out_features=1)
        self.logistic = nn.Sigmoid()


    def forward(self, user_indices, item_indices):
        user_embedding = self.embedding_user(user_indices)
        item_embedding = self.embedding_item(item_indices)
        element_product = torch.mul(user_embedding, item_embedding)
        logits = self.affine_output(element_product)
        rating = self.logistic(logits)

        return rating


In [39]:
model = NCF(n_users, n_items, embed_dim)

In [40]:
learning_rate = 0.001
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [42]:
num_epochs = 5
total_samples = len(train_dataset)
n_iterations = math.ceil(total_samples / bs)
print(total_samples, n_iterations)

for epoch in range(num_epochs):
    for i, (users, items, ratings) in enumerate(train_dataloader):
        loss_step = 0.0
        rating_preds = model(users, items)
        #print(rating_preds.type())
        #print(ratings.type())
        #print()
        #print()
        optimizer.zero_grad()
        loss = criterion(rating_preds, ratings)
        loss.backward()
        optimizer.step()

        loss_step += loss.item()

        if (i+1) % 100 == 0:
            print("epoch {}/{}, step {}/{}, loss {}".format(
                        epoch+1, num_epochs, i+1,n_iterations, loss_step/users.shape[0]))

80000 313
epoch 1/5, step 100/313, loss 0.03364389389753342
epoch 1/5, step 200/313, loss 0.03225594013929367
epoch 1/5, step 300/313, loss 0.03213614225387573
epoch 2/5, step 100/313, loss 0.030468609184026718
epoch 2/5, step 200/313, loss 0.0321945995092392
epoch 2/5, step 300/313, loss 0.03212772682309151
epoch 3/5, step 100/313, loss 0.032438457012176514
epoch 3/5, step 200/313, loss 0.033875204622745514
epoch 3/5, step 300/313, loss 0.033552736043930054
epoch 4/5, step 100/313, loss 0.03097473829984665
epoch 4/5, step 200/313, loss 0.031215116381645203
epoch 4/5, step 300/313, loss 0.03274260461330414
epoch 5/5, step 100/313, loss 0.029919710010290146
epoch 5/5, step 200/313, loss 0.03254241123795509
epoch 5/5, step 300/313, loss 0.03041829727590084


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7fc7563524a8>>
Traceback (most recent call last):
  File "/media/eduardo/SSD_DATA/Appl ML Projects/venv/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
    self._shutdown_workers()
  File "/media/eduardo/SSD_DATA/Appl ML Projects/venv/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7fc756024f98>>
Traceback (most recent call last):
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <t

In [43]:
userid = 1
item_missig = 2
userid = torch.LongTensor([userid])
itemid = torch.LongTensor([item_missig])

with torch.no_grad():
    rating_pred = model(userid, itemid)
    print("Predicted rating: {}".format(rating_pred))

Predicted rating: tensor([[0.9270]])


In [28]:
with torch.no_grad():
    for i, (users, items, ratings) in enumerate(test_dataloader):
        rating_pred = model(users, items)
        print("rating: {}, prediction:{}".format(ratings, rating_pred))

rating: tensor([[4.],
        [3.],
        [4.],
        [2.],
        [2.],
        [3.],
        [5.],
        [4.],
        [3.],
        [4.],
        [4.],
        [4.],
        [3.],
        [4.],
        [3.],
        [3.],
        [2.],
        [3.],
        [4.],
        [3.],
        [4.],
        [2.],
        [3.],
        [3.],
        [3.],
        [5.],
        [4.],
        [5.],
        [3.],
        [5.],
        [3.],
        [4.],
        [3.],
        [3.],
        [3.],
        [4.],
        [3.],
        [5.],
        [4.],
        [5.],
        [5.],
        [5.],
        [4.],
        [2.],
        [3.],
        [4.],
        [3.],
        [3.],
        [3.],
        [2.],
        [5.],
        [3.],
        [5.],
        [4.],
        [4.],
        [5.],
        [4.],
        [3.],
        [1.],
        [3.],
        [4.],
        [3.],
        [5.],
        [5.],
        [4.],
        [3.],
        [2.],
        [2.],
        [3.],
        [3.],
        [3.]