In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from pathlib import Path

import pandas as pd
import numpy as np

In [3]:
cd ..

C:\Projects\python\recommender


In [4]:
import torch as T
import torch.nn as nn
import torch.optim as optim

from datasets import MovelenDataset, TorchMovielen10k

## Init Dataloader instance

In [5]:
DEVICE = T.device('cpu')
BATCH = 32
SHUFFLE = False
WORKERS = 0
FILE_PATH = Path("./inputs/ml-100k/u.data")

In [6]:
databunch = TorchMovielen10k(FILE_PATH, user_min=4, item_min=4)
databunch.batch(BATCH)
databunch.device(DEVICE)
databunch.shuffle(SHUFFLE)
databunch.workers(WORKERS)

2019-09-03 21:52:53,312 - C:\Projects\python\recommender\utils.py - INFO - Read dataset in inputs\ml-100k\u.data
I0903 21:52:53.312215 11580 torch_movielen.py:41] Read dataset in inputs\ml-100k\u.data
2019-09-03 21:52:53,324 - C:\Projects\python\recommender\utils.py - INFO - Original user size: 943
I0903 21:52:53.324210 11580 torch_movielen.py:45] Original user size: 943
2019-09-03 21:52:53,327 - C:\Projects\python\recommender\utils.py - INFO - Original item size: 1682
I0903 21:52:53.327176 11580 torch_movielen.py:46] Original item size: 1682
2019-09-03 21:52:53,333 - C:\Projects\python\recommender\utils.py - INFO - Filter user size: 943
I0903 21:52:53.333160 11580 torch_movielen.py:52] Filter user size: 943
2019-09-03 21:52:53,336 - C:\Projects\python\recommender\utils.py - INFO - Filter item size: 1413
I0903 21:52:53.336151 11580 torch_movielen.py:53] Filter item size: 1413
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http:/

In [7]:
train_dl = databunch.get_dataloader(dataset_type='train')
train_it = iter(train_dl)

In [8]:
users, pos_batch, neg_batch = train_it.next()
print("positive batch sample: {}".format(pos_batch.shape))
print("negative batch sample: {}".format(neg_batch.shape))
print("users shape: {}".format(users.shape))

positive batch sample: torch.Size([32, 3769])
negative batch sample: torch.Size([32, 3769])
users shape: torch.Size([32])


## Init and test FM model

In [9]:
feat_dim = databunch.feat_dim
num_dim = 32
init_mean = 1

In [10]:
from models import TorchFM, FMLearner

In [11]:
model = TorchFM(feat_dim, num_dim, init_mean)
model

TorchFM()

In [12]:
for p in model.parameters():
    print(p)

Parameter containing:
tensor([[-0.3057],
        [-0.7673],
        [ 0.5171],
        ...,
        [-0.1742],
        [ 0.0964],
        [-0.5015]], dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([[ 0.4152,  0.7864, -0.8384,  ..., -0.6606,  0.5350,  0.9923],
        [ 0.7903, -0.6261,  0.1036,  ...,  0.5113, -0.3935, -0.5454],
        [ 0.8182,  0.6280,  0.4224,  ..., -0.8660, -0.9848, -0.3496],
        ...,
        [-0.2492, -0.9904, -0.3341,  ..., -0.9930,  0.3931,  0.6805],
        [-0.4864,  0.6287, -0.7958,  ..., -0.5035,  0.8540, -0.5516],
        [ 0.6106,  0.4347, -0.3553,  ..., -0.9837, -0.1213, -0.3209]],
       dtype=torch.float64, requires_grad=True)


In [13]:
user_index, pos_feats, neg_feats = train_it.next()

In [14]:
pos_preds, neg_preds = model(pos_batch, neg_batch)

In [15]:
print(pos_preds.size())
print(neg_preds.size())

torch.Size([32])
torch.Size([32])


In [16]:
pos_preds.device

device(type='cpu')

## Init and test Learner functions

In [17]:
op = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(op, step_size=1000, gamma=1.)

In [18]:
learner = FMLearner(model, op, scheduler, databunch)

In [19]:
bprloss = learner.criterion(pos_preds, neg_preds, linear_reg=0.001, factor_reg=0.001)
bprloss

tensor([346.3287], grad_fn=<NegBackward>)

In [20]:
# Test accuarcy functions
binary_ranks = (pos_preds - neg_preds) > 0
binary_ranks = binary_ranks.to(T.float)
users, user_counts = T.unique(user_index, return_counts=True)
user_counts = user_counts.to(T.float)

In [21]:
print("user index: ", user_index)
print("user counts: ", user_counts)
print("users: ", users)

user index:  tensor([711, 711, 711, 711, 711, 711, 711, 711, 711, 711, 711, 711, 711, 711,
        711, 711, 711, 711, 711, 711, 711, 711, 711, 711, 711, 711, 711, 711,
        711, 711, 711, 711])
user counts:  tensor([32.])
users:  tensor([711])


In [22]:
print("user index shape:", user_index.shape)
print("binary rank shape:", binary_ranks.shape)

user index shape: torch.Size([32])
binary rank shape: torch.Size([32])


In [24]:
learner.hit_per_user.scatter_add_(0, user_index, binary_ranks)
learner.hit_per_user[users]

tensor([18.])

In [25]:
learner.user_counts.size()

torch.Size([943])

In [26]:
learner.user_counts[users] += user_counts

In [27]:
learner.user_counts[users]

tensor([32.])

In [28]:
learner.hit_per_user[users]

tensor([18.])

In [29]:
learner.update_hit_counts(user_index, pos_preds, neg_preds)
learner.user_counts[users]

tensor([64.])

In [25]:
learner.hit_per_user[users]

tensor([ 55.,  47.,   2.,   5.,  10., 177.,  12.,  13.,  12.,  33.,  44.,  25.,
         19.,  27.,  13.,  16.,   1.])

In [30]:
learner.compute_l2_term(linear_reg=1, factor_reg=1)

tensor([41517.1094], grad_fn=<AddBackward0>)

In [34]:
len(train_dl.dataset)

97657

## Test Fit function

In [26]:
learner.fit(1)

  0%|                                                                                                                                                                      | 0/1 [00:00<?, ?it/s]

Epoch: 0





RuntimeError: Expected object of backend CUDA but got backend CPU for argument #3 'index'