In [1]:
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

from fastai.vision.all import *
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset


In [2]:
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)

In [3]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user', 'movie', 'rating', 'timestamp'])
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
movies = pd.read_csv(path/'u.item', delimiter='|', encoding='latin-1'
                     ,usecols=(0,1), names=('movie', 'title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [5]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [6]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,769,Live Nude Girls (1995),3
1,919,"Fox and the Hound, The (1981)",4
2,305,"Princess Bride, The (1987)",3
3,243,Rudy (1993),3
4,933,Chinatown (1974),4
5,267,Supercop (1992),5
6,678,Twelve Monkeys (1995),4
7,851,Sunset Park (1996),2
8,237,12 Angry Men (1957),4
9,299,"Third Man, The (1949)",4


In [7]:
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])
n_factors=5

In [8]:
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)

In [9]:
user_factors[0]
movie_factors[0]

tensor([-2.6920,  1.2717, -2.2588,  1.2614, -0.1179])

the tric for finding the user or movie out of all users and movies is to just multiply their matrices by one hot encoding vector which is 1 at the index of the user or movie we want

In [10]:
one_hot_3 = one_hot(3, n_users).float()


In [11]:
user_factors.t() @ one_hot_3

tensor([ 0.2460, -0.1448, -0.2297, -0.3867, -0.8923])

In [12]:
user_factors[3]

tensor([ 0.2460, -0.1448, -0.2297, -0.3867, -0.8923])

In [13]:
class DotProduct(Module):
       def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
              self.user_factors = Embedding(n_users, n_factors)
              self.movie_factors = Embedding(n_movies, n_factors)
              self.y_range = y_range


       def forward(self, x):
              users = self.user_factors(x[:,0])
              movies = self.movie_factors(x[:,1])
              return sigmoid_range((users * movies).sum(dim=1), *self.y_range)

In [14]:
x, y = dls.one_batch()
x.shape, y.shape

(torch.Size([64, 2]), torch.Size([64, 1]))

In [15]:
model = DotProduct(n_users,n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())

In [16]:
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.96701,1.000576,00:07
1,0.656378,0.949495,00:06
2,0.461041,0.962326,00:07
3,0.344249,0.971073,00:06
4,0.33446,0.970758,00:06


In [17]:
class DotProduct(Module):
       def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
              self.user_factors = Embedding(n_users, n_factors)
              self.user_bias = Embedding(n_users, 1)
              self.movie_factors = Embedding(n_movies, n_factors)
              self.movie_bias = Embedding(n_movies, 1)
              self.y_range = y_range


       def forward(self, x):
              users = self.user_factors(x[:,0])
              movies = self.movie_factors(x[:,1])
              res = (users * movies).sum(dim=1, keepdim=True)
              res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
              return sigmoid_range(res, *self.y_range)

In [18]:
model = DotProduct(n_users,n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.868177,0.953846,00:08
1,0.579467,0.92258,00:07
2,0.405349,0.947315,00:08
3,0.313669,0.955286,00:08
4,0.313394,0.956333,00:07


In [19]:
layer = nn.Linear(3, 2)   # 3 inputs, 2 outputs
list(layer.parameters())


[Parameter containing:
 tensor([[-0.1150, -0.3953,  0.4092],
         [ 0.4080, -0.0909,  0.3471]], requires_grad=True),
 Parameter containing:
 tensor([0.1162, 0.4940], requires_grad=True)]

5. What are we doing in your screenshot?

The book is teaching you how to build your own layer from scratch.

They first show that if you just stick a torch.Tensor in a Module, it won’t show up in .parameters().

That’s why optimizers won’t update it.

Then, they’ll explain how to wrap it with nn.Parameter, so PyTorch knows to treat it as a learnable weight.

In [20]:
class T(Module):
       def __init__(self):
              self.a = nn.Linear(1, 3, bias=False)

list(T().parameters())

[Parameter containing:
 tensor([[0.6643],
         [0.7452],
         [0.9157]], requires_grad=True)]

In [21]:
type(T().a.weight)


torch.nn.parameter.Parameter

In [22]:
def create_params(size):
       return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))

In [23]:
class DotProductBias(Module):
       def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
              self.user_factors = create_params([n_users, n_factors])
              self.user_bias = create_params([n_users])
              self.movie_factors = create_params([n_movies, n_factors])
              self.movie_bias = create_params([n_movies])
              self.y_range = y_range


       def forward(self, x):
              users = self.user_factors[x[:,0]]
              movies = self.movie_factors[x[:,1]]
              res = (users * movies).sum(dim=1)
              res += self.user_bias[x[:,0]] + self.movie_bias[x[:,1]]
              return sigmoid_range(res, *self.y_range)

In [24]:
model = DotProductBias(n_users,n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.870008,0.952631,00:08
1,0.662209,0.896736,00:07
2,0.518685,0.877492,00:07
3,0.438259,0.862042,00:08
4,0.432716,0.857693,00:07


.argsort() doesn’t sort the values themselves — it returns the indices that would sort the tensor.

In [25]:
movie_bias = learn.model.movie_bias
idxs = movie_bias.argsort()[:5]
[dls.classes['title'][i]for i in idxs]

['Children of the Corn: The Gathering (1996)',
 'Lawnmower Man 2: Beyond Cyberspace (1996)',
 'Island of Dr. Moreau, The (1996)',
 'Amityville II: The Possession (1982)',
 'Robocop 3 (1993)']

In [26]:
movie_bias   


Parameter containing:
tensor([ 0.0026, -0.1017,  0.0164,  ...,  0.0312,  0.0931,  0.0276],
       device='mps:0', requires_grad=True)

In [27]:
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i]for i in idxs]

['As Good As It Gets (1997)',
 "Schindler's List (1993)",
 'L.A. Confidential (1997)',
 'Usual Suspects, The (1995)',
 'Shawshank Redemption, The (1994)']

In [28]:
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.919825,0.962692,00:08
1,0.664201,0.913296,00:09
2,0.521963,0.886505,00:08
3,0.45845,0.870482,00:07
4,0.440088,0.867486,00:07


In [29]:
learn.model

EmbeddingDotBias(
  (u_weight): Embedding(944, 50)
  (i_weight): Embedding(1665, 50)
  (u_bias): Embedding(944, 1)
  (i_bias): Embedding(1665, 1)
)

In [30]:
movie_bias = learn.model.i_bias.weight.squeeze()
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i]for i in idxs]

['Titanic (1997)',
 "Schindler's List (1993)",
 'Good Will Hunting (1997)',
 'L.A. Confidential (1997)',
 'As Good As It Gets (1997)']

In [31]:
movie_factors = learn.model.i_weight.weight

In [32]:
movie_factors.shape

torch.Size([1665, 50])

In [33]:
idx = dls.classes['title'].o2i['Star Wars (1977)']
distances = nn.CosineSimilarity(dim=1)(movie_factors, movie_factors[idx][None])


In [34]:
distances.shape

torch.Size([1665])

What is inside dls.classes?

Each categorical column in your dataset gets an entry in dls.classes.

The key is the column name (like 'userId', 'title', 'genre' etc.).

The value is a list of all categories for that column, in the same order that the model uses.

So depending on how your dls was built, you could write things like:

dls.classes['userId']   # list of all users
dls.classes['title']    # list of all movies
dls.classes['genre']    # list of genres (if genre was categorical)


In [35]:
idx = distances.argsort(descending=True)[1]
dls.classes['title'][idx]

'Return of the Jedi (1983)'

In [36]:
dls.classes.get(2)


In [37]:
embs = get_emb_sz(dls)
embs

[(944, 74), (1665, 102)]

In [42]:
class collabNN(Module):
       def __init__(self, user_sz, item_sz, y_range=(0,5.5), n_act=100):
              self.user_factors = Embedding(*user_sz)
              self.item_factors = Embedding(*item_sz)
              self.layers = nn.Sequential(
                     nn.Linear(user_sz[1]+item_sz[1], n_act),
                     nn.ReLU(),
                     nn.Linear(n_act, 1))
              self.y_range = y_range


       def forward(self, x):
              embs = self.user_factors(x[:,0]), self.item_factors(x[:,1])
              x = self.layers(torch.cat(embs, dim=1))
              return sigmoid_range(x, *self.y_range)
              

In [43]:
model = collabNN(*embs)

In [46]:
learn = Learner(dls, model, loss_func = MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.919232,0.977765,00:10
1,0.860224,0.949502,00:10
2,0.826421,0.90959,00:09
3,0.772648,0.886222,00:09
4,0.796232,0.874989,00:10


In [47]:
learn = collab_learner(dls, use_nn =True, y_range=(0, 5.5), layers=[100,50])
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.980299,0.997496,00:14
1,0.906129,0.933708,00:12
2,0.827551,0.91246,00:12
3,0.800535,0.877614,00:12
4,0.73266,0.871324,00:12
