In [3]:
import pandas as pd
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

from fastai.collab import *
from fastai.tabular.all import *
set_seed(0)

In [4]:
ratings = pd.read_csv('../ml-latest-small/ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [5]:
movies = pd.read_csv('../ml-latest-small/movies.csv', usecols=['movieId', 'title'])
movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [6]:
ratings = ratings.merge(movies, on='movieId')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,title
0,1,1,4.0,964982703,Toy Story (1995)
1,5,1,4.0,847434962,Toy Story (1995)
2,7,1,4.5,1106635946,Toy Story (1995)
3,15,1,2.5,1510577970,Toy Story (1995)
4,17,1,4.5,1305696483,Toy Story (1995)


In [7]:
dls = CollabDataLoaders.from_df(ratings, user_name='userId', item_name='title', rating_name='rating', bs=32)
dls.show_batch()

Unnamed: 0,userId,title,rating
0,307,Signs (2002),3.0
1,437,Rob Roy (1995),3.0
2,77,"Lord of the Rings: The Two Towers, The (2002)",5.0
3,474,Mean Creek (2004),4.5
4,267,Aliens (1986),5.0
5,526,American Hustle (2013),4.0
6,232,"Break-Up, The (2006)",3.0
7,298,Heartbreakers (2001),3.0
8,543,Coyote Ugly (2000),5.0
9,540,Twister (1996),3.5


In [8]:
n_users = len(dls.classes['userId'])
n_movies = len(dls.classes['title'])
n_factors = 5

user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)

In [9]:
class DotProductBias(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
        self.user_factors = Embedding(n_users, n_factors)
        self.user_bias = Embedding(n_users, 1)
        self.movie_factors = Embedding(n_movies, n_factors)
        self.movie_bias = Embedding(n_movies, 1)
        self.y_range = y_range
        
    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        res = (users*movies).sum(dim=1, keepdim=True)
        res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
        return sigmoid_range(res, *self.y_range)

In [10]:
model = DotProductBias(n_users, n_movies, n_factors)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time


In [11]:
model

DotProductBias(
  (user_factors): Embedding(611, 5)
  (user_bias): Embedding(611, 1)
  (movie_factors): Embedding(9720, 5)
  (movie_bias): Embedding(9720, 1)
)