In [1]:
from fastbook import *

In [2]:
path = untar_data(URLs.ML_100k)

path.ls()

(#23) [Path('/Users/modsoussi/.fastai/data/ml-100k/u.item'),Path('/Users/modsoussi/.fastai/data/ml-100k/u3.test'),Path('/Users/modsoussi/.fastai/data/ml-100k/u1.base'),Path('/Users/modsoussi/.fastai/data/ml-100k/u.info'),Path('/Users/modsoussi/.fastai/data/ml-100k/u2.test'),Path('/Users/modsoussi/.fastai/data/ml-100k/u5.test'),Path('/Users/modsoussi/.fastai/data/ml-100k/u.genre'),Path('/Users/modsoussi/.fastai/data/ml-100k/ub.test'),Path('/Users/modsoussi/.fastai/data/ml-100k/ua.base'),Path('/Users/modsoussi/.fastai/data/ml-100k/u.data')...]

In [3]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None, names=['user', 'movie', 'rating', 'timestamp'])

ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
movies = pd.read_csv(path/'u.item', delimiter='|', header=None, names=['movie', 'title'], encoding='latin-1', usecols=(0,1))

movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [5]:
ratings = ratings.merge(movies)

ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [6]:
class CollabBase(torch.nn.Module):
  def __init__(self, n_users, n_items, n_factors=50) -> None:
    super().__init__()
    
    self.user_embs = torch.nn.Embedding(n_users, n_factors)
    self.item_embs = torch.nn.Embedding(n_items, n_factors)
    
  def forward(self, x) -> None:
    users = self.user_embs(x[:, 0])
    items = self.item_embs(x[:, 1])
    
    return (users*items).sum(dim=1)

In [7]:
n_users = ratings['user'].unique().size
n_movies = movies.size

n_users, n_movies

(943, 3364)

In [8]:
collab_base_model = CollabBase(n_users, n_movies, n_factors=10)

In [9]:
train_x = torch.tensor(ratings[['user','movie']].to_numpy(), device='mps')
train_y = torch.tensor(ratings['rating'].to_numpy(), device='mps')

collab_base_model.to(torch.device('mps'))

CollabBase(
  (user_embs): Embedding(943, 10)
  (item_embs): Embedding(3364, 10)
)

In [10]:
preds = collab_base_model(train_x)

preds[:5]

tensor([ 4.1102, -0.3084, -2.8220,  1.9269, -4.0297], device='mps:0', grad_fn=<SliceBackward0>)

In [11]:
train_y[:5]

tensor([3, 3, 5, 3, 5], device='mps:0')

In [12]:
def loss_fn(preds: Tensor, acts: Tensor):
  return ((preds-acts)**2).mean()

In [13]:
loss_fn(preds, train_y).backward()

In [14]:
for p in collab_base_model.parameters():
  print(p.grad)

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [-1.0574e-04, -8.0882e-03, -3.4599e-03,  ..., -2.2741e-03, -2.9881e-03, -4.0471e-05],
        [-1.5026e-03,  1.3557e-03,  7.3217e-04,  ...,  1.4020e-03, -8.1576e-04, -2.6260e-03],
        ...,
        [ 2.5674e-05, -4.1313e-04, -4.9655e-03,  ..., -2.9176e-03,  1.7902e-03, -4.7898e-03],
        [ 1.9400e-05, -3.5011e-05,  2.3021e-04,  ..., -1.1853e-03,  1.5976e-03,  1.0368e-03],
        [ 1.1558e-04, -2.1474e-05, -1.1468e-03,  ...,  1.6367e-03,  5.9288e-04, -8.9711e-06]], device='mps:0')
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0039,  0.0009, -0.0020,  ...,  0.0020,  0.0016, -0.0031],
        [-0.0015,  0.0002,  0.0004,  ..., -0.0005,  0.0030, -0.0037],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  