# Implementing MF(Matrix Factorization) with PyTorch

In [3]:
import torch
import pandas as pd
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import Dataset, DataLoader

## Load MovieLens dataset

https://grouplens.org/datasets/movielens/

In [6]:
class MovieLensDataset(Dataset):
    def __init__(self, datapath):
        self.data_pd = pd.read_csv(datapath, sep="\t", names=['user', 'movie', 'rating', 'timestamp'])
        self.items = torch.LongTensor(self.data_pd['movie'])
        self.users = torch.LongTensor(self.data_pd['user'])
        self.ratings = torch.FloatTensor(self.data_pd['rating'])
        
    def __len__(self):
        return len(self.ratings)
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]
    def get_datasize(self):
        return self.users.max()+1, self.items.max()+1, len(self.ratings)
    
train_data = MovieLensDataset("./datasets/ml-100k/ua.base")
test_data = MovieLensDataset("./datasets/ml-100k/ua.test")

batch_size = 128
train_loader = DataLoader(train_data, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_data, batch_size = batch_size, shuffle = False)

## Initialization

In [7]:
n_users, n_items, n_ratings = train_data.get_datasize()
_, _, n_ratings_test = test_data.get_datasize()

In [8]:
class MF(nn.Module):
    def __init__(self, num_users, num_items, rank = 10):
        super().__init__()
        self.U = torch.nn.Parameter(torch.randn(num_users, rank))
        self.V = torch.nn.Parameter(torch.randn(num_items, rank))
        
    def forward(self, users, items):
        ratings = torch.sum(self.U[users]*self.V[items], dim = -1)
        return ratings

In [9]:
mf_model = MF(n_users, n_items, rank = 16)
optimizer = torch.optim.Adam(mf_model.parameters(), lr = 0.01)
criterion = nn.MSELoss()

## Training

In [11]:
for epoch in range(20):
    cost = 0
    for users, items, ratings in train_loader:
        optimizer.zero_grad()
        ratings_pred = mf_model(users, items)
        loss = criterion(ratings_pred, ratings)
        loss.backward()
        optimizer.step()
        cost += loss.item() * len(ratings)
        
    cost /= n_ratings
    
    print(f"Epoch: {epoch}")
    print("train cost: {:.6f}".format(cost))
    
    cost_test = 0
    for users, items, ratings in test_loader:
        ratings_pred = mf_model(users, items)
        loss = criterion(ratings_pred, ratings)
        cost_test += loss.item() * len(ratings)
        
    cost_test /= n_ratings_test
    print("test cost: {:6f}".format(cost_test))

Epoch: 0
train cost: 6.501411
test cost: 5.556784
Epoch: 1
train cost: 1.603567
test cost: 2.998580
Epoch: 2
train cost: 1.052737
test cost: 2.336104
Epoch: 3
train cost: 0.896339
test cost: 2.051907
Epoch: 4
train cost: 0.833382
test cost: 1.940572
Epoch: 5
train cost: 0.801024
test cost: 1.901787
Epoch: 6
train cost: 0.778794
test cost: 1.853296
Epoch: 7
train cost: 0.756702
test cost: 1.853393
Epoch: 8
train cost: 0.736542
test cost: 1.815162
Epoch: 9
train cost: 0.716472
test cost: 1.778731
Epoch: 10
train cost: 0.696008
test cost: 1.774883
Epoch: 11
train cost: 0.679025
test cost: 1.755267
Epoch: 12
train cost: 0.664997
test cost: 1.761176
Epoch: 13
train cost: 0.655007
test cost: 1.758764
Epoch: 14
train cost: 0.641781
test cost: 1.764417
Epoch: 15
train cost: 0.630595
test cost: 1.774766
Epoch: 16
train cost: 0.627464
test cost: 1.760248
Epoch: 17
train cost: 0.617315
test cost: 1.744669
Epoch: 18
train cost: 0.610447
test cost: 1.758911
Epoch: 19
train cost: 0.606934
test cost: