# Matrix factorization in PyTorch

ref: https://www.ethanrosenthal.com/2017/06/20/matrix-factorization-in-pytorch/

In [None]:
# Import modules

import numpy as np
import pandas as pd

import scipy.sparse
import torch

In [None]:
# Load a rating data from MovieLens.

df_ratings = pd.read_csv('./data/ml-latest-small/ratings.csv')
df_ratings.head()

In [None]:
# Count the number of users and items

n_ratings = len(df_ratings)
n_users = df_ratings.userId.nunique()
n_items = df_ratings.movieId.nunique()

print('(%d, %d)' % (n_users, n_items))

In [None]:
# Make rating matrix as sparse matrix

ratings = df_ratings.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)
ratings = ratings.to_numpy()
ratings

In [None]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        
        self.user_factors = torch.nn.Embedding(n_users, 
                                               n_factors,
                                               sparse=True)
        
        self.item_factors = torch.nn.Embedding(n_items, 
                                               n_factors,
                                               sparse=True)
        
    def forward(self, user, item):
        return (self.user_factors(user) * self.item_factors(item)).sum(1)

In [None]:
# Model, matrix factorization in PyTorch

model = MatrixFactorization(n_users, n_items, n_factors=20)
model

In [None]:
# Loss function, MSE Loss

loss_func = torch.nn.MSELoss()
loss_func

In [None]:
# Optimizer, learning rate = 1e-6

optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
optimizer

In [None]:
# Return the indeces of non-zero elements
rows, cols = ratings.nonzero()

# Shuffle the indeces
p = np.random.permutation(len(rows))
rows, cols = rows[p], cols[p]

# Training loop
for i, (row, col) in enumerate(zip(*(rows, cols))):
    # Turn data into tensors
    rating = torch.FloatTensor([ratings[row, col]])
    row = torch.LongTensor([row])
    col = torch.LongTensor([col])

    # Predict and calculate loss
    prediction = model(row, col)
    loss = loss_func(prediction, rating)
    
    # Debug print
    if i % 500 == 0:
        print(i, loss.item())
    
    # Backpropagate
    loss.backward()
    
    # Update the parameters
    optimizer.step()