In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import rand as sprand
import torch
import torch.nn as nn
import torch.nn.functional as F

# Make up some random explicit feedback ratings
# and convert to a numpy array
n_users = 1000
n_items = 1000
ratings = sprand(n_users, n_items, 
                 density=0.01, format='csr')
ratings.data = (np.random.randint(1, 5, 
                                  size=ratings.nnz)
                          .astype(np.float64))
ratings = ratings.toarray()

In [2]:
pd.DataFrame(ratings).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
ratings.shape

(1000, 1000)

In [2]:
class MatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, 
                                               n_factors,
                                               sparse=True)
        self.item_factors = torch.nn.Embedding(n_items, 
                                               n_factors,
                                               sparse=True)
        
    def forward(self, user, item):
        return (self.user_factors(user) * self.item_factors(item)).sum(1)

In [3]:
model = MatrixFactorization(n_users, n_items, n_factors=20)

In [4]:
loss_func = torch.nn.MSELoss()

In [5]:
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=1e-6) # learning rate

In [6]:
# Sort our data
rows, cols = ratings.nonzero()
p = np.random.permutation(len(rows))
rows, cols = rows[p], cols[p]

for row, col in zip(*(rows, cols)):
    # Turn data into tensors
    rating = torch.FloatTensor([ratings[row, col]])
    row = torch.LongTensor([row])
    col = torch.LongTensor([col])
    
    # Predict and calculate loss
    prediction = model(row, col)
    loss = loss_func(prediction, rating)
    
    # Backpropagate
    loss.backward()
    
    # Update the parameters
    optimizer.step()

In [9]:
# basic matrix factorization model without bias term
class Recommender(nn.Module):
    def __init__(self, num_users, num_artists, num_factors):
        super().__init__()
        self.u = nn.Embedding(num_users, num_factors)
        self.a = nn.Embedding(num_artists, num_factors)
        self.u.weight.data.uniform_(-.01, .01)
        self.a.weight.data.uniform_(-.01, .01)

    def forward(self, cats, conts):
        users, artists = cats[:,0], cats[:,1]
        us, art = self.u(users), self.a(artists)
        return (us*art).sum(1)

In [10]:
class BiasedMatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, 
                                               n_factors,
                                               sparse=True)
        self.item_factors = torch.nn.Embedding(n_items, 
                                               n_factors,
                                               sparse=True)
        self.user_biases = torch.nn.Embedding(n_users, 
                                              1,
                                              sparse=True)
        self.item_biases = torch.nn.Embedding(n_items,
                                              1,
                                              sparse=True)
        
    def forward(self, user, item):
        pred = self.user_biases(user) + self.item_biases(item)
        pred += (self.user_factors(user) * self.item_factors(item)).sum(dim=1, keepdim=True)
        return pred.squeeze()

In [11]:
reg_loss_func = torch.optim.SGD(model.parameters(), lr=1e-6,
                                weight_decay=1e-5)

In [13]:
reg_loss_func

SGD (
Parameter Group 0
    dampening: 0
    lr: 1e-06
    momentum: 0
    nesterov: False
    weight_decay: 1e-05
)

In [12]:
adagrad_loss = torch.optim.Adagrad(model.parameters(), lr=1e-6)

In [14]:
adagrad_loss

Adagrad (
Parameter Group 0
    eps: 1e-10
    initial_accumulator_value: 0
    lr: 1e-06
    lr_decay: 0
    weight_decay: 0
)