Implementing Probabilist Matrix Factorization in PyTorch. From this paper: https://papers.nips.cc/paper/3208-probabilistic-matrix-factorization.pdf.

Data from here: https://grouplens.org/datasets/movielens/

In [4]:
import pandas as pd
import torch

In [5]:
ratings = pd.read_csv('ml-latest-small/ratings.csv')

In [6]:
ratings.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,100836.0,100836.0,100836.0,100836.0
mean,326.127564,19435.295718,3.501557,1205946000.0
std,182.618491,35530.987199,1.042529,216261000.0
min,1.0,1.0,0.5,828124600.0
25%,177.0,1199.0,3.0,1019124000.0
50%,325.0,2991.0,3.5,1186087000.0
75%,477.0,8122.0,4.0,1435994000.0
max,610.0,193609.0,5.0,1537799000.0


In [238]:
rating_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')
n_users, n_movies = rating_matrix.shape
# Scaling ratings to between 0 and 1, this helps our model by constraining predictions
min_rating, max_rating = ratings['rating'].min(), ratings['rating'].max()
rating_matrix = (rating_matrix - min_rating) / (max_rating - min_rating)

In [239]:
# Replacing missing ratings with -1 so we can filter them out later
rating_matrix[rating_matrix.isnull()] = -1
rating_matrix = torch.FloatTensor(rating_matrix.values)

In [258]:
# This is how we can define our feature matrices
# We're going to be training these, so we'll need gradients
latent_vectors = 5
user_features = torch.randn(n_users, latent_vectors, requires_grad=True)
user_features.data.mul_(0.01)
movie_features = torch.randn(n_movies, latent_vectors, requires_grad=True)
movie_features.data.mul_(0.01)

tensor([[-6.5396e-03, -2.3539e-03, -1.8817e-02, -1.5198e-02,  9.0273e-03],
        [-5.4049e-03, -2.4260e-05,  1.0203e-02, -1.2512e-02, -1.4097e-02],
        [ 6.5175e-04, -1.8315e-03,  2.6132e-03, -8.3010e-03,  4.1832e-03],
        ...,
        [ 7.8845e-03,  1.3597e-02, -6.7369e-04, -2.5653e-02,  1.3477e-02],
        [ 9.8239e-03,  4.0798e-03,  1.0044e-02,  1.4503e-02, -2.1100e-03],
        [-2.2349e-02,  4.6519e-03, -1.9321e-03,  1.6966e-02,  1.0361e-02]])

In [259]:
class PMFLoss(torch.nn.Module):
    def __init__(self, lam_u=0.3, lam_v=0.3):
        super().__init__()
        self.lam_u = lam_u
        self.lam_v = lam_v 
    
    def forward(self, matrix, u_features, v_features):
        non_zero_mask = (matrix != -1).type(torch.FloatTensor)
        predicted = torch.sigmoid(torch.mm(u_features, v_features.t()))
        
        diff = (matrix - predicted)**2
        prediction_error = torch.sum(diff*non_zero_mask)

        u_regularization = self.lam_u * torch.sum(u_features.norm(dim=1))
        v_regularization = self.lam_v * torch.sum(v_features.norm(dim=1))
        
        return prediction_error + u_regularization + v_regularization

In [260]:
criterion = PMFLoss()
loss = criterion(rating_matrix, user_features, movie_features)

In [261]:
loss

tensor(8291.1846)

In [262]:
loss.backward()

In [264]:
# Actual training loop now

latent_vectors = 30
user_features = torch.randn(n_users, latent_vectors, requires_grad=True)
user_features.data.mul_(0.01)
movie_features = torch.randn(n_movies, latent_vectors, requires_grad=True)
movie_features.data.mul_(0.01)

pmferror = PMFLoss(lam_u=0.05, lam_v=0.05)
optimizer = torch.optim.Adam([user_features, movie_features], lr=0.01)
for step, epoch in enumerate(range(1000)):
    optimizer.zero_grad()
    loss = pmferror(rating_matrix, user_features, movie_features)
    loss.backward()
    optimizer.step()
    if step % 10 == 0:
        print(f"Step {step}, {loss:.3f}")

Step 0, 8252.765
Step 10, 7135.935
Step 20, 4187.129
Step 30, 3497.064
Step 40, 3078.548
Step 50, 2748.131
Step 60, 2456.536
Step 70, 2197.751
Step 80, 1972.338
Step 90, 1780.675
Step 100, 1619.302
Step 110, 1484.564
Step 120, 1372.859
Step 130, 1280.696
Step 140, 1204.615
Step 150, 1141.495
Step 160, 1088.808
Step 170, 1044.518
Step 180, 1006.979
Step 190, 974.852
Step 200, 947.060
Step 210, 922.785
Step 220, 901.409
Step 230, 882.460
Step 240, 865.547
Step 250, 850.338
Step 260, 836.557
Step 270, 823.975
Step 280, 812.400
Step 290, 801.680
Step 300, 791.728
Step 310, 782.502
Step 320, 773.928
Step 330, 765.920
Step 340, 758.409
Step 350, 751.341
Step 360, 744.669
Step 370, 738.365
Step 380, 732.406
Step 390, 726.762
Step 400, 721.405
Step 410, 716.313
Step 420, 711.466
Step 430, 706.846
Step 440, 702.437
Step 450, 698.223
Step 460, 694.188
Step 470, 690.320
Step 480, 686.604
Step 490, 683.026
Step 500, 679.569
Step 510, 676.229
Step 520, 673.015
Step 530, 669.917
Step 540, 666.932
St

In [271]:
# Checking if our model can reproduce the true user ratings
user_idx = 4
user_ratings = rating_matrix[user_idx, :]
true_ratings = user_ratings != -1
predictions = torch.sigmoid(torch.mm(user_features[user_idx, :].view(1, -1), movie_features.t()))
print("Predictions: \n", (predictions.squeeze()[true_ratings]*(max_rating - min_rating) + min_rating).round())
print("Truth: \n", (user_ratings[true_ratings]*(max_rating - min_rating) + min_rating))

Predictions: 
 tensor([ 4.,  4.,  4.,  4.,  3.,  4.,  5.,  4.,  3.,  3.,  4.,  5.,
         3.,  4.,  3.,  1.,  5.,  5.,  3.,  2.,  3.,  3.,  3.,  2.,
         3.,  4.,  2.,  3.,  4.,  4.,  5.,  3.,  5.,  4.,  3.,  4.,
         3.,  4.,  2.,  5.,  5.,  5.,  3.,  3.])
Truth: 
 tensor([ 4.,  4.,  4.,  4.,  3.,  4.,  5.,  4.,  3.,  3.,  4.,  5.,
         3.,  4.,  3.,  1.,  5.,  5.,  3.,  2.,  3.,  3.,  3.,  2.,
         3.,  4.,  2.,  3.,  4.,  4.,  5.,  3.,  5.,  4.,  3.,  4.,
         3.,  5.,  3.,  5.,  5.,  5.,  3.,  3.])
