In [1]:
from fastbook import untar_data, URLs, Learner, MSELossFlat
import pandas as pd
import torch
import torch.nn.functional as F

In [2]:
path = untar_data(URLs.ML_100k)

path.ls()

(#23) [Path('/Users/modsoussi/.fastai/data/ml-100k/u.item'),Path('/Users/modsoussi/.fastai/data/ml-100k/u3.test'),Path('/Users/modsoussi/.fastai/data/ml-100k/u1.base'),Path('/Users/modsoussi/.fastai/data/ml-100k/u.info'),Path('/Users/modsoussi/.fastai/data/ml-100k/u2.test'),Path('/Users/modsoussi/.fastai/data/ml-100k/u5.test'),Path('/Users/modsoussi/.fastai/data/ml-100k/u.genre'),Path('/Users/modsoussi/.fastai/data/ml-100k/ub.test'),Path('/Users/modsoussi/.fastai/data/ml-100k/ua.base'),Path('/Users/modsoussi/.fastai/data/ml-100k/u.data')...]

In [3]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None, names=['user', 'movie', 'rating', 'timestamp'])

ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
movies = pd.read_csv(path/'u.item', delimiter='|', header=None, names=['movie', 'title'], encoding='latin-1', usecols=(0,1))

movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [5]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [6]:
movies[movies['title'].duplicated()]

Unnamed: 0,movie,title
267,268,Chasing Amy (1997)
302,303,Ulee's Gold (1997)
347,348,Desperate Measures (1998)
499,500,Fly Away Home (1996)
669,670,Body Snatchers (1993)
679,680,Kull the Conqueror (1997)
864,865,"Ice Storm, The (1997)"
880,881,Money Talks (1997)
1002,1003,That Darn Cat! (1997)
1256,1257,"Designated Mourner, The (1997)"


In [7]:
class CollabBase(torch.nn.Module):
  def __init__(self, n_users, n_items, n_factors=50) -> None:
    super().__init__()
    
    self.user_embs = torch.nn.Embedding(n_users, n_factors)
    self.item_embs = torch.nn.Embedding(n_items, n_factors)
    
  def forward(self, x) -> None:
    users = self.user_embs(x[:, 0])
    items = self.item_embs(x[:, 1])
    
    return (users*items).sum(dim=1, keepdim=True)

In [8]:
n_users = ratings['user'].unique().size + 1
n_movies = movies['movie'].unique().size + 1

n_users, n_movies

(944, 1683)

In [9]:
collab_base_model = CollabBase(n_users, n_movies, n_factors=5)

In [10]:
train_x = torch.tensor(ratings[['user','movie']].to_numpy())
train_y = torch.tensor(ratings['rating'].to_numpy(), dtype=torch.float32).reshape(100000, 1)

In [11]:
train_x[:5]

tensor([[196, 242],
        [ 63, 242],
        [226, 242],
        [154, 242],
        [306, 242]])

In [12]:
train_y[:5]

tensor([[3.],
        [3.],
        [5.],
        [3.],
        [5.]])

In [13]:
preds = collab_base_model(train_x)

preds[:5]

tensor([[-1.3344],
        [ 0.7741],
        [ 1.9214],
        [ 1.7741],
        [ 5.5323]], grad_fn=<SliceBackward0>)

In [14]:
def loss_fn(preds: torch.torch.torch.Tensor, acts: torch.Tensor):
  return ((preds-acts)**2).mean()

In [15]:
loss = loss_fn(preds, train_y)
loss

tensor(18.7232, grad_fn=<MeanBackward0>)

In [16]:
loss.backward()

In [17]:
for p in collab_base_model.parameters():
  print(p.grad)

tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [-0.0073, -0.0022, -0.0078, -0.0069,  0.0067],
        [-0.0014, -0.0011, -0.0014, -0.0008, -0.0006],
        ...,
        [-0.0008,  0.0003,  0.0002,  0.0005, -0.0005],
        [-0.0020,  0.0018, -0.0004, -0.0024, -0.0015],
        [ 0.0127, -0.0021, -0.0049,  0.0041, -0.0030]])
tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 1.4865e-02,  1.0719e-02, -3.9424e-03, -3.0932e-02,  2.1619e-02],
        [ 8.4124e-04, -4.0043e-04,  1.1712e-03, -4.1002e-03,  5.1003e-04],
        ...,
        [-1.0124e-05, -2.4185e-05,  1.4304e-06,  1.4304e-05, -1.3315e-05],
        [-2.9709e-05, -1.3993e-06,  6.2898e-05,  3.3715e-05,  5.3659e-06],
        [ 6.0848e-05,  5.2527e-05, -1.4601e-04,  3.7800e-05,  4.8249e-06]])


In [18]:
def calculate_norm(model: torch.nn.Module):
  total_norm = 0
  for p in model.parameters():
    p_norm = p.grad.data.norm(2)
    total_norm += p_norm.item()**2

  return total_norm**(1./2)

In [19]:
norm = calculate_norm(collab_base_model)
norm

0.2892694744365496

In [20]:
batch_x = train_x[:64]
batch_y = train_y[:64]

In [21]:
preds = collab_base_model(batch_x)
loss = loss_fn(preds, batch_y)

for p in collab_base_model.parameters():
  p.grad.zero_()
  
loss.backward()

total_norm = calculate_norm(collab_base_model)
total_norm

7.739633367014832

In [22]:
for p in collab_base_model.parameters():
  print(p.grad.data)

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [-8.4278e-04,  3.5593e-01, -4.0464e-01,  1.9867e-01,  1.9077e-01],
        [-3.2935e-04,  1.3909e-01, -1.5813e-01,  7.7637e-02,  7.4551e-02],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]])
tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


In [34]:
import torch.utils.data as data

def train(model: torch.nn.Module, train_x: torch.Tensor, train_y: torch.Tensor, n_epochs=5, lr=.1, loss_fn=F.mse_loss, wd=0.0): 
  if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    model.to('mps')
    x = train_x.to('mps')
    y = train_y.to('mps')
  
  dataset = data.TensorDataset(x,y)
  
  train_size = round(.8 * len(x))
  valid_size = len(x) - train_size
  train_set, validation_set = data.random_split(dataset, [train_size, valid_size])
  
  t_loader = data.DataLoader(train_set, 64, True)
  v_loader = data.DataLoader(validation_set, 64, True)
  
  optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd)
  for i in range(n_epochs):
    model.train()
    t_loss = 0.0
    for xb, yb in t_loader:
      optimizer.zero_grad()
      preds = model(xb)
      loss = loss_fn(preds, yb)
      loss.backward()
      optimizer.step()
      t_loss += loss.item()
    
    t_loss /= len(t_loader)
    
    model.eval()
    v_loss = 0.0
    with torch.no_grad():
      for vbx, vby in v_loader:
        preds = model(vbx)
        loss = loss_fn(preds, vby)
        v_loss += loss.item()
      
    v_loss /= len(v_loader)
    
    print(f"t_loss: {t_loss} - v_loss: {v_loss}")
    
  model.cpu()

In [24]:
test_model = CollabBase(n_users, n_movies, n_factors=50)

train(test_model, train_x, train_y, lr=5e-2, loss_fn=F.mse_loss)

t_loss: 50.466563557434085 - v_loss: 38.444924966976664
t_loss: 26.666370598602295 - v_loss: 29.749442389978768
t_loss: 17.401744898986816 - v_loss: 23.877596681491255
t_loss: 11.715404853439331 - v_loss: 19.513604551077652
t_loss: 8.206440361213684 - v_loss: 16.521111779319593


In [25]:
class CollabBaseWithSigmoid(torch.nn.Module):
  def __init__(self, n_users, n_items, n_factors=50, y_range=(0, 5.5)) -> None:
    super().__init__()
    
    self.user_embs = torch.nn.Parameter(torch.zeros((n_users, n_factors)).normal_(0, .1))
    self.item_embs = torch.nn.Parameter(torch.zeros((n_items, n_factors)).normal_(0, .1))
    self.y_range = y_range
    
  def forward(self, x):
    users = self.user_embs[x[:,0]]
    items = self.item_embs[x[:, 1]]
    
    return F.sigmoid((users*items).sum(dim=1, keepdim=True))*(self.y_range[1]-self.y_range[0])+self.y_range[0]
    

In [26]:
test_model = CollabBaseWithSigmoid(n_users, n_movies)

# train(test_model, train_x=train_x, train_y=train_y, lr=.1)
preds = test_model(train_x)
loss = F.mse_loss(preds, train_y)
loss.backward()

In [27]:
preds[:5]

tensor([[2.6456],
        [2.7745],
        [2.7752],
        [2.6335],
        [2.7593]], grad_fn=<SliceBackward0>)

In [28]:
for p in test_model.parameters():
  print(p.grad)

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 9.7796e-05, -4.2376e-07,  7.7484e-05,  ..., -4.3495e-06, -1.7913e-05,  1.0256e-04],
        [-7.1929e-06, -1.0483e-05, -3.7816e-05,  ..., -3.6810e-05,  1.1849e-05,  2.1148e-05],
        ...,
        [ 1.2515e-05,  2.3932e-05,  2.9909e-05,  ...,  1.9447e-05,  1.5321e-06,  5.3283e-06],
        [ 8.6231e-05,  1.2214e-04,  4.4610e-05,  ...,  2.7586e-05,  3.8811e-05,  6.3183e-05],
        [-1.9674e-05, -1.8895e-05,  7.4104e-05,  ...,  1.0754e-05,  3.4789e-05,  8.6842e-05]])
tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [-3.4450e-05, -1.0641e-04,  1.8842e-04,  ..., -5.7897e-05, -5.4989e-05,  3.6645e-05],
        [-1.5647e-05, -9.0353e-05,  1.7989e-05,  ..., -1.9558e-05,  3.2111e-05, -3.6920e-05],
        ...,
        [ 9.4734e-07, -2.8883e-06,  1.0318e-06,  ..., -1.5823e-07, -4.7661e-07,  2.8141e-06],
        [ 9.5175e-07,  4.0479e-07

In [29]:
test_model = CollabBaseWithSigmoid(n_users, n_movies)

train(test_model, train_x=train_x, train_y=train_y, lr=5e-1, loss_fn=F.mse_loss)

t_loss: 1.8300450501441956 - v_loss: 1.5779079038875934
t_loss: 1.136578863954544 - v_loss: 1.0212789810122773
t_loss: 0.8467411319971084 - v_loss: 0.9377202062180248
t_loss: 0.7149230293035507 - v_loss: 0.9106605769917607
t_loss: 0.6072116461038589 - v_loss: 0.9065401154204299


In [32]:
class CollabWithBias(torch.nn.Module):
  def __init__(self, n_users, n_items, n_factors=50, y_range=(0,5.5)) -> None:
    super().__init__()
    
    self.user_embs = torch.nn.Embedding(n_users, n_factors)
    torch.nn.init.normal_(self.user_embs.weight, 0, .1)
    self.user_bias = torch.nn.Embedding(n_users, 1)
    
    self.item_embs = torch.nn.Embedding(n_items, n_factors)
    torch.nn.init.normal_(self.item_embs.weight, 0, .1)
    self.item_bias = torch.nn.Embedding(n_items, 1)
    
    self.y_range = y_range
    
  def forward(self, x) -> torch.Tensor:
    users = self.user_embs(x[:, 0])
    user_bias = self.user_bias(x[:, 0])
    
    items = self.item_embs(x[:, 1])
    item_bias = self.item_bias(x[:, 1])
    
    interaction = (users * items).sum(dim=1, keepdim=True)
    interaction += user_bias + item_bias
    
    return F.sigmoid(interaction)*(self.y_range[1]-self.y_range[0]) + self.y_range[0]

In [33]:
test_model = CollabWithBias(n_users, n_movies)

train(test_model, train_x=train_x, train_y=train_y, loss_fn=F.mse_loss, lr=5e-1)

t_loss: 1.8699339015960694 - v_loss: 1.23423226534749
t_loss: 1.0436586469173432 - v_loss: 1.0292088225626717
t_loss: 0.8764641754388809 - v_loss: 0.9652712836433143
t_loss: 0.7681681468009949 - v_loss: 0.9390704671795757
t_loss: 0.666668168091774 - v_loss: 0.9307696501287027


In [35]:
movie_bias = test_model.item_bias.weight.squeeze()
movie_bias

tensor([-0.6158,  0.4824,  0.1583,  ...,  1.5118,  0.3861,  0.7259], grad_fn=<SqueezeBackward0>)

In [36]:
idx = movie_bias.argsort()[:5].tolist()

[movies['title'][i-1] for i in idx]

['Coldblooded (1995)',
 'Infinity (1996)',
 'Homage (1995)',
 'Other Voices, Other Rooms (1997)',
 'They Made Me a Criminal (1939)']

In [37]:
test_model(train_x[:5]), train_y[:5]

(tensor([[3.6199],
         [3.5962],
         [4.1350],
         [4.0241],
         [4.3498]], grad_fn=<AddBackward0>),
 tensor([[3.],
         [3.],
         [5.],
         [3.],
         [5.]]))

In [38]:
with torch.no_grad():
  print(F.mse_loss(test_model(train_x[:10]), train_y[:10]))

tensor(0.4140)


In [41]:
test_model = CollabWithBias(n_users, n_movies)

train(test_model, train_x=train_x, train_y=train_y, loss_fn=F.mse_loss, lr=5e-3, n_epochs=5, wd=.1)

t_loss: 3.2584820722579955 - v_loss: 2.687063015687961
t_loss: 2.2850856517791747 - v_loss: 2.0464982491331742
t_loss: 1.883082295703888 - v_loss: 1.8156594369358148
t_loss: 1.7441454754829406 - v_loss: 1.7385347289399218
t_loss: 1.6977210119247437 - v_loss: 1.7115658102705837


In [42]:
test_model(train_x[:5]), train_y[:5]

(tensor([[2.7883],
         [2.7725],
         [2.8252],
         [2.7049],
         [2.6648]], grad_fn=<AddBackward0>),
 tensor([[3.],
         [3.],
         [5.],
         [3.],
         [5.]]))