In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np

In [3]:
cd ..

C:\Projects\python\recommender


In [4]:
import torch as T
import torch.nn as nn
import torch.optim as optim

from datasets.torch_movielen import TorchMovielen10k

## Init Dataloader instance

In [5]:
DEVICE = T.cuda.current_device()

In [6]:
databunch = TorchMovielen10k('./inputs/ml-100k/u.data', user_min=4, item_min=4)
databunch.batch(1100)
databunch.device(DEVICE)
databunch.shuffle(False)
databunch.workers(0)

2019-09-02 16:22:16,948 - C:\Projects\python\recommender\utils.py - INFO - Read dataset in ./inputs/ml-100k/u.data
I0902 16:22:16.948872 14424 torch_movielen.py:45] Read dataset in ./inputs/ml-100k/u.data
2019-09-02 16:22:16,958 - C:\Projects\python\recommender\utils.py - INFO - Original user size: 943
I0902 16:22:16.958889 14424 torch_movielen.py:49] Original user size: 943
2019-09-02 16:22:16,962 - C:\Projects\python\recommender\utils.py - INFO - Original item size: 1682
I0902 16:22:16.962880 14424 torch_movielen.py:50] Original item size: 1682
2019-09-02 16:22:16,967 - C:\Projects\python\recommender\utils.py - INFO - Filter user size: 943
I0902 16:22:16.967866 14424 torch_movielen.py:56] Filter user size: 943
2019-09-02 16:22:16,969 - C:\Projects\python\recommender\utils.py - INFO - Filter item size: 1413
I0902 16:22:16.969860 14424 torch_movielen.py:57] Filter item size: 1413
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

In [7]:
train_dl = databunch.get_dataloader(dataset_type='train')
valid_dl = databunch.get_dataloader(dataset_type='valid')
test_dl = databunch.get_dataloader(dataset_type='test')
train_it = iter(train_dl)

In [8]:
pos_batch, neg_batch = train_it.next()
print("positive batch sample: {}".format(pos_batch))
print("negative batch sample: {}".format(neg_batch))

positive batch sample: tensor([[ 258,  254,    0],
        [ 258,  285,  197],
        [ 258,  297, 1100],
        ...,
        [ 755,   87,   59],
        [ 755,  734,  388],
        [ 892,   10,  943]])
negative batch sample: tensor([[ 258,  228,    0],
        [ 258,  581,  197],
        [ 258,  611, 1100],
        ...,
        [ 755,  710,   59],
        [ 755,  467,  388],
        [ 892,  878,  943]])


## Init and test FM model

In [9]:
cat_dict = databunch.cat_dict
pos_names = databunch.pos_cat_names
neg_names = databunch.neg_cat_names
num_dim = 32

In [10]:
from models.torch_fm import TorchFM, FMLearner

In [11]:
model = TorchFM(cat_dict, pos_names, neg_names, num_dim=10)
model

TorchFM(
  (emb_linear_layer): ModuleList(
    (0): Embedding(943, 1)
    (1): Embedding(1413, 1)
    (2): Embedding(1415, 1)
  )
  (emb_factor_layer): ModuleList(
    (0): Embedding(943, 10)
    (1): Embedding(1413, 10)
    (2): Embedding(1415, 10)
  )
)

In [12]:
pos_preds, neg_preds = model(pos_batch, neg_batch)

In [13]:
print(pos_preds)
print(neg_preds)

tensor([ -0.6241,  52.6917,  15.7346,  ..., -10.4479, -15.5627, -11.7775],
       grad_fn=<SqueezeBackward0>)
tensor([  1.6336,  19.7692,  41.5251,  ...,   4.2171, -13.3173, -11.7502],
       grad_fn=<SqueezeBackward0>)


## Init and test Learner functions

In [14]:
op = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.StepLR(op, step_size=1000, gamma=1.)

In [15]:
# Check cuda is avaliable
T.cuda.is_available()

True

In [16]:
learner = FMLearner(model, op, scheduler, databunch, T.cuda.current_device())

In [17]:
bprloss = learner.criterion(pos_preds, neg_preds, linear_reg=0.001)
bprloss

tensor([6663.6865], grad_fn=<NegBackward>)

In [18]:
# Test accuarcy functions
binary_ranks = (pos_preds - neg_preds) > 0
binary_ranks = binary_ranks.to(T.float)
users_index = pos_batch[:, 0]
users, user_counts = T.unique(users_index, return_counts=True)
user_counts = user_counts.to(T.float)

In [19]:
print("user index: ", users_index)
print("user counts: ", user_counts)
print("users: ", users)

user index:  tensor([258, 258, 258,  ..., 755, 755, 892])
user counts:  tensor([134., 106.,   7.,   8.,  21., 361.,  29.,  35.,  21.,  67.,  89.,  60.,
         34.,  60.,  22.,  45.,   1.])
users:  tensor([ 22, 118, 156, 194, 258, 275, 290, 531, 593, 639, 711, 755, 816, 820,
        850, 892, 912])


In [20]:
learner.user_counts.size()

torch.Size([943])

In [21]:
learner.user_counts[users] += user_counts

In [22]:
learner.user_counts[users]

tensor([134., 106.,   7.,   8.,  21., 361.,  29.,  35.,  21.,  67.,  89.,  60.,
         34.,  60.,  22.,  45.,   1.])

In [23]:
learner.hit_per_user[users]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [24]:
learner.update_hit_counts(pos_batch, pos_preds, neg_preds)
learner.user_counts[users]

tensor([268., 212.,  14.,  16.,  42., 722.,  58.,  70.,  42., 134., 178., 120.,
         68., 120.,  44.,  90.,   2.])

In [25]:
learner.hit_per_user[users]

tensor([ 55.,  47.,   2.,   5.,  10., 177.,  12.,  13.,  12.,  33.,  44.,  25.,
         19.,  27.,  13.,  16.,   1.])

In [30]:
learner.compute_l2_term(linear_reg=1, factor_reg=1)

tensor([41564.7422])

In [34]:
len(train_dl.dataset)

97657

## Test Fit function

In [26]:
learner.fit(1)

  0%|                                                                                                                                                                      | 0/1 [00:00<?, ?it/s]

Epoch: 0





RuntimeError: Expected object of backend CUDA but got backend CPU for argument #3 'index'