In [35]:
import numpy as np
import pandas as pd
from scipy.sparse import rand as sprand
import torch
import torch.nn as nn
import torch.nn.functional as F

# Make up some random explicit feedback ratings
# and convert to a numpy array
n_users = 1000
n_items = 1000
ratings = sprand(n_users, n_items, 
                 density=0.01, format='csr')
ratings.data = (np.random.randint(1, 5, 
                                  size=ratings.nnz)
                          .astype(np.float64))
ratings = ratings.toarray()

In [4]:
ratings

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [3]:
ratings.shape

(1000, 1000)

In [13]:
from torch.autograd import Variable

In [2]:
class MatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, 
                                               n_factors,
                                               sparse=True)
        self.item_factors = torch.nn.Embedding(n_items, 
                                               n_factors,
                                               sparse=True)
        
    def forward(self, user, item):
        return (self.user_factors(user) * self.item_factors(item)).sum(1)

In [3]:
model = MatrixFactorization(n_users, n_items, n_factors=20)

In [4]:
loss_func = torch.nn.MSELoss()

In [9]:
# optimizer = torch.optim.SGD(model.parameters(), 
#                             lr=1e-6) # learning rate

optimizer = torch.optim.Adagrad(model.parameters(), lr=1e-6)

In [10]:
# Sort our data
n_epochs= 3
rows, cols = ratings.nonzero()
p = np.random.permutation(len(rows))
rows, cols = rows[p], cols[p]
for epoch in range(n_epochs):
    for row, col in zip(*(rows, cols)):
        # Turn data into tensors
        rating = torch.FloatTensor([ratings[row, col]])
        row = torch.LongTensor([row])
        col = torch.LongTensor([col])

        # Predict and calculate loss
        prediction = model(row, col)
        loss = loss_func(prediction, rating)

        # Backpropagate
        loss.backward()

        # Update the parameters
        optimizer.step()
    print(f'loss on epoch {epoch} is {loss}')

loss on epoch 0 is 17.685449600219727
loss on epoch 1 is 17.682336807250977
loss on epoch 2 is 17.68018341064453


In [30]:
# def _shuffle(interactions):

#     users = interactions.row
#     items = interactions.col
#     ratings = interactions.data

#     shuffle_indices = np.arange(len(users))
#     np.random.shuffle(shuffle_indices)

#     return (users[shuffle_indices].astype(np.int64),
#             items[shuffle_indices].astype(np.int64),
#             ratings[shuffle_indices].astype(np.float32))

def _shuffle(interactions):

    rows, cols = ratings.nonzero()
    p = np.random.permutation(len(rows))
    users, items = rows[p], cols[p]
    ratings = ratings.nonzero()

    return (users.astype(np.int64),
            items.astype(np.int64),
            ratings.astype(np.float32))

def _gpu(tensor, gpu=False):

    if gpu:
        return tensor.cuda()
    else:
        return tensor


def _cpu(tensor):

    if tensor.is_cuda:
        return tensor.cpu()
    else:
        return tensor


def _minibatch(tensor, batch_size):

    for i in range(0, len(tensor), batch_size):
        yield tensor[i:i + batch_size]

In [28]:
rows, cols = ratings.nonzero()
p = np.random.permutation(len(rows))
rows, cols = rows[p], cols[p]

In [29]:
p

array([5596, 2780, 9537, ..., 6526, 6244, 7791])

In [42]:
ratings = sprand(n_users, n_items, 
                 density=0.01, format='csr')
ratings.data = (np.random.randint(1, 5, 
                                  size=ratings.nnz)
                          .astype(np.float64))
ratings = ratings.toarray()

In [52]:
_batch_size = 64
_num_users = None
_num_items = 5
_net = None
_use_cuda = False
for epoch_num in range(n_epochs):

    rows, cols = ratings.nonzero()
    p = np.random.permutation(len(rows))
    users, items = rows[p], cols[p]
    ratings = ratings.nonzero()
    ratings = np.asarray(ratings)

    user_ids_tensor = _cpu(torch.from_numpy(users)
                          )
    item_ids_tensor = _cpu(torch.from_numpy(items))
    ratings_tensor = _cpu(torch.from_numpy(ratings)
                         )

    epoch_loss = 0.0

    for (batch_user,
         batch_item,
         batch_ratings) in zip(_minibatch(user_ids_tensor,
                                          _batch_size),
                               _minibatch(item_ids_tensor,
                                          _batch_size),
                               _minibatch(ratings_tensor,
                                          _batch_size)):

        user_var = Variable(batch_user)
        item_var = Variable(batch_item)
        ratings_var = Variable(batch_ratings)

        optimizer.zero_grad()

        loss = _pointwise_loss(user_var, item_var, ratings_var)
        epoch_loss += loss.data[0]

        loss.backward()
        optimizer.step()

    if verbose:
        print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))

TypeError: 'NoneType' object is not callable

# Loss functions

In [51]:
def _pointwise_loss(users, items, ratings):

    negatives = Variable(
        _gpu(
            torch.from_numpy(np.random.randint(0,
                                               _num_items,
                                               len(users))),
            _use_cuda)
    )

    positives_loss = (1.0 - F.sigmoid(_net(users, items)))
    negatives_loss = F.sigmoid(_net(users, negatives))

    return torch.cat([positives_loss, negatives_loss]).mean()

# def _bpr_loss(self, users, items, ratings):

#     negatives = Variable(
#         _gpu(
#             torch.from_numpy(np.random.randint(0,
#                                                self._num_items,
#                                                len(users))),
#             self._use_cuda)
#     )

#     return (1.0 - F.sigmoid(self._net(users, items) -
#                             self._net(users, negatives))).mean()

# def _adaptive_loss(self, users, items, ratings,
#     n_neg_candidates=5):
#     negatives = Variable(
#         _gpu(
#             torch.from_numpy(
#                 np.random.randint(0, self._num_items,
#                     (len(users), n_neg_candidates))),
#             self._use_cuda)
#     )
#     negative_predictions = self._net(
#         users.repeat(n_neg_candidates, 1).transpose_(0,1),
#         negatives
#         ).view(-1, n_neg_candidates)

#     best_negative_prediction, _ = negative_predictions.max(1)
#     positive_prediction = self._net(users, items)

#     return torch.mean(torch.clamp(best_negative_prediction -
#                                   positive_prediction
#                                   + 1.0, 0.0))

# Architectures

In [5]:
# basic matrix factorization model without bias term
class Recommender(nn.Module):
    def __init__(self, num_users, num_artists, num_factors):
        super().__init__()
        self.u = nn.Embedding(num_users, num_factors)
        self.a = nn.Embedding(num_artists, num_factors)
        self.u.weight.data.uniform_(-.01, .01)
        self.a.weight.data.uniform_(-.01, .01)

    def forward(self, cats, conts):
        users, artists = cats[:,0], cats[:,1]
        us, art = self.u(users), self.a(artists)
        return (us*art).sum(1)

In [6]:
class BiasedMatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, 
                                               n_factors,
                                               sparse=True)
        self.item_factors = torch.nn.Embedding(n_items, 
                                               n_factors,
                                               sparse=True)
        self.user_biases = torch.nn.Embedding(n_users, 
                                              1,
                                              sparse=True)
        self.item_biases = torch.nn.Embedding(n_items,
                                              1,
                                              sparse=True)
        
    def forward(self, user, item):
        pred = self.user_biases(user) + self.item_biases(item)
        pred += (self.user_factors(user) * self.item_factors(item)).sum(dim=1, keepdim=True)
        return pred.squeeze()

In [None]:
import torch.nn as nn
class MatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        self.user_factors = nn.Embedding(n_users, n_factors, sparse=True)
        self.item_factors = nn.Embedding(n_users, n_factors, sparse=True)
        self.user_biases = nn.Embedding(n_users, 1, sparse=True)
        self.item_biases = nn.Embedding((n_users, 1, sparse=True)
        
    def forward(self, user, item):
        dot = (self.user_factors(user) * self.item_factors(item)).sum(dim=1, keepdim=True)
        bias = self.user_biases(user) + self.item_biases(item)
        return dot + bias

In [None]:
reg_loss_func = torch.optim.SGD(model.parameters(), lr=1e-6,
                                weight_decay=1e-5)

In [13]:
reg_loss_func

SGD (
Parameter Group 0
    dampening: 0
    lr: 1e-06
    momentum: 0
    nesterov: False
    weight_decay: 1e-05
)

In [12]:
adagrad_loss = torch.optim.Adagrad(model.parameters(), lr=1e-6)

In [14]:
adagrad_loss

Adagrad (
Parameter Group 0
    eps: 1e-10
    initial_accumulator_value: 0
    lr: 1e-06
    lr_decay: 0
    weight_decay: 0
)