In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

####  a recommender system on the MovieLens dataset

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from tqdm import tqdm


In [3]:
! head data/ml-100k/u.data

196	242	3	881250949
186	302	3	891717742
22	377	1	878887116
244	51	2	880606923
166	346	1	886397596
298	474	4	884182806
115	265	2	881171488
253	465	5	891628467
305	451	3	886324817
6	86	3	883603013


In [4]:
# import the ml100k dataset
df = pd.read_csv('data/ml-100k/u.data', sep='\t', header=None, names=['user_id', 'item_id', 'rating', 'timestamp'])
df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
# count the number of unique users and items
print(df.user_id.unique().shape[0]), print(df.item_id.unique().shape[0])

943
1682


(None, None)

In [6]:
# number of ratings
print(df.shape[0])

100000


In [7]:
# split the data into train and test sets
train, test = train_test_split(df, test_size=0.2, random_state=42)

In [8]:
# instead, lets create a function which creates a dictionary of user ids and item ids
def create_user_item_dict(df):
    user_ids = df.user_id.unique().tolist()
    item_ids = df.item_id.unique().tolist()
    user2idx = {o:i for i,o in enumerate(user_ids)}
    item2idx = {o:i for i,o in enumerate(item_ids)}
    return user2idx, item2idx

In [9]:
# get the user and item dictionaries
user2idx, item2idx = create_user_item_dict(train)

In [10]:
train.head()

Unnamed: 0,user_id,item_id,rating,timestamp
75220,807,1411,1,893082619
48955,474,659,5,887925187
44966,463,268,4,877384940
13568,139,286,4,879537844
92727,621,751,4,883799651


In [11]:
# create a dataset class that does the encoding too
class MovieLensDataset(Dataset):
    def __init__(self, df, user2idx, item2idx):
        self.users = df.user_id.values
        self.items = df.item_id.values
        self.ratings = df.rating.values
        self.user2idx = user2idx
        self.item2idx = item2idx
        
    def __len__(self):
        return len(self.users)
    
    def __getitem__(self, idx):
        user = self.user2idx.get(self.users[idx], -1)
        item = self.item2idx.get(self.items[idx], -1)
        rating = self.ratings[idx]
        return user, item, rating

In [12]:
# initialize the dataset
train_ds = MovieLensDataset(train, user2idx, item2idx)
test_ds = MovieLensDataset(test, user2idx, item2idx)

In [13]:
#check the dataset class
train_ds[0]


(0, 0, 1)

In [14]:

# create a dataloader

train_dl = DataLoader(train_ds, batch_size=1024, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=1024, shuffle=False)


In [15]:
next(iter(train_dl))

[tensor([316, 257,  99,  ..., 207, 182, 203]),
 tensor([100, 694, 201,  ...,  12, 616, 404]),
 tensor([3, 1, 4,  ..., 2, 2, 5])]

In [16]:
next(iter(test_dl))

[tensor([360, 144, 150,  ..., 157, 162, 221]),
 tensor([ 347, 1015,  328,  ..., 1266, 1297,   77]),
 tensor([4, 3, 4,  ..., 2, 5, 3])]

In [17]:
#device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.has_mps else "cpu")
device = torch.device("cpu")
print(device)

cpu


### i. Trying Matrix Factorization model
- Here Embedding layer is used to learn the latent factors
- GMF model without biases

In [18]:
# create a model class
class MatrixFactorization(nn.Module):
    def __init__(self, n_users, n_items, n_factors=40):
        super().__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.item_factors = nn.Embedding(n_items, n_factors)
        #initialize the embeddings, this is important. Else, model can struggle to learn
        nn.init.normal_(self.user_factors.weight, std=0.01)
        nn.init.normal_(self.item_factors.weight, std=0.01)
        
    def forward(self, user, item):
        return (self.user_factors(user) * self.item_factors(item)).sum(1)


# create a model class with bias
class MatrixFactorizationBias(nn.Module):
    def __init__(self, n_users, n_items, n_factors=40):
        super().__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.item_factors = nn.Embedding(n_items, n_factors)
        self.user_bias = nn.Embedding(n_users, 1)
        self.item_bias = nn.Embedding(n_items, 1)
        #initialize the embeddings, this is important. Else, model can struggle to learn
        nn.init.normal_(self.user_factors.weight, std=0.01)
        nn.init.normal_(self.item_factors.weight, std=0.01)
        nn.init.normal_(self.user_bias.weight, std=0.01)
        nn.init.normal_(self.item_bias.weight, std=0.01)
        
    def forward(self, user, item):
        return (self.user_factors(user) * self.item_factors(item)).sum(1) + self.user_bias(user).squeeze() + self.item_bias(item).squeeze()

In [19]:

# initialize the model
model = MatrixFactorizationBias(len(user2idx), len(item2idx), n_factors=60).to(device)

# define the loss function. We will use mean squared error because we are predicting ratings
# if we were predicting a binary outcome, we would use binary cross entropy
criterion = nn.MSELoss()

# define the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

# define the number of epochs
epochs = 10

In [20]:
# no of batches
len(train_dl)


79

In [21]:
# train the model.

for epoch in range(epochs):
    running_loss = 0.0
    train_tqdm = tqdm(train_dl)
    model.train()
    for i, data in enumerate(train_tqdm):
        users, items, ratings = data
        users = users.long().to(device)
        items = items.long().to(device)
        ratings = ratings.float().to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        outputs = model(users, items)
        loss = criterion(outputs, ratings)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        train_tqdm.set_description(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 14.0585: 100%|██████████| 79/79 [00:00<00:00, 206.51it/s]
Epoch 1, Loss: 12.7910: 100%|██████████| 79/79 [00:00<00:00, 194.36it/s]
Epoch 2, Loss: 12.7606: 100%|██████████| 79/79 [00:00<00:00, 217.65it/s]
Epoch 3, Loss: 12.3909: 100%|██████████| 79/79 [00:00<00:00, 214.03it/s]
Epoch 4, Loss: 14.2272: 100%|██████████| 79/79 [00:00<00:00, 187.84it/s]
Epoch 5, Loss: 12.1089: 100%|██████████| 79/79 [00:00<00:00, 221.73it/s]
Epoch 6, Loss: 13.3881: 100%|██████████| 79/79 [00:00<00:00, 207.65it/s]
Epoch 7, Loss: 11.7460: 100%|██████████| 79/79 [00:00<00:00, 234.60it/s]
Epoch 8, Loss: 11.6867: 100%|██████████| 79/79 [00:00<00:00, 189.13it/s]
Epoch 9, Loss: 11.8292: 100%|██████████| 79/79 [00:00<00:00, 178.98it/s]


In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.has_mps else "cpu")
#device = torch.device("cpu")
print(device)

mps


In [24]:
model = model.to(device)

In [25]:
# predict the ratings for the test set
y_true = []
y_pred = []
for i, data in enumerate(test_dl):
    model.eval()
    users, items, ratings = data
    users = users.long().to(device)
    items = items.long().to(device)
    ratings = ratings.float().to(device)
    
    outputs = model(users, items)
    y_true.extend(ratings.cpu().detach().numpy())
    y_pred.extend(outputs.cpu().detach().numpy())

In [26]:
# calculate the mean squared error
from sklearn.metrics import mean_squared_error
mean_squared_error(y_true, y_pred)


12.287557

### ii. Trying Neural collaborative filtering model
- Variant 1 with MLP only

In [27]:
#device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.has_mps else "cpu")
device = torch.device("cpu")
print(device)

cpu


In [28]:
### neural collaborative filtering model

class NCF(nn.Module):
    def __init__(self, n_users, n_items, n_factors=40):
        super().__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.item_factors = nn.Embedding(n_items, n_factors)
        #initialize the embeddings, this is important. Else, model can struggle to learn
        nn.init.normal_(self.user_factors.weight, std=0.01)
        nn.init.normal_(self.item_factors.weight, std=0.01)
        
        self.user_bias = nn.Embedding(n_users, 1)
        self.item_bias = nn.Embedding(n_items, 1)
        nn.init.constant_(self.user_bias.weight, 0)
        nn.init.constant_(self.item_bias.weight, 0)
        
        self.fc1 = nn.Linear(2*n_factors, 10) # 2*n_factors because we are concatenating user and item embeddings
        self.fc2 = nn.Linear(10, 1) # 1 because we are predicting a single value
        
        
    def forward(self, user, item):
        user_vec = self.user_factors(user)
        item_vec = self.item_factors(item)
        user_bias = self.user_bias(user)
        item_bias = self.item_bias(item)
        
        x = torch.cat([user_vec, item_vec], dim=1) # concatenate user and item embeddings. Dimensions are batch_size x (2*n_factors)
        x = F.relu(self.fc1(x)) # apply a non-linear activation function. Dimensions are batch_size x 20
        x = self.fc2(x) # Dimensions are batch_size x 1
        x = x + user_bias + item_bias # add the bias terms, Dimensions are batch_size x 1
        return x.squeeze(1) # squeeze the output to get a single value

In [29]:
len(user2idx), len(item2idx)

(943, 1653)

In [30]:

# initialize the model
model = NCF(len(user2idx), len(item2idx), n_factors=5)

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

epochs = 10

In [31]:
# no of batches
len(train_dl)

79

In [32]:
# no of model parameters
sum(p.numel() for p in model.parameters() if p.requires_grad)

15697

In [33]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    train_tqdm = tqdm(train_dl)
    for i, data in enumerate(train_tqdm):
        users, items, ratings = data
        #print(users.shape, items.shape, ratings.shape)
        users = users.long()
        items = items.long()
        ratings = ratings.float()
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        outputs = model(users, items)
        loss = criterion(outputs, ratings)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        train_tqdm.set_description(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 1.2550: 100%|██████████| 79/79 [00:00<00:00, 187.59it/s]
Epoch 1, Loss: 1.5168: 100%|██████████| 79/79 [00:00<00:00, 211.83it/s]
Epoch 2, Loss: 1.0841: 100%|██████████| 79/79 [00:00<00:00, 218.82it/s]
Epoch 3, Loss: 1.1753: 100%|██████████| 79/79 [00:00<00:00, 218.06it/s]
Epoch 4, Loss: 1.2825: 100%|██████████| 79/79 [00:00<00:00, 221.89it/s]
Epoch 5, Loss: 1.4951: 100%|██████████| 79/79 [00:00<00:00, 191.82it/s]
Epoch 6, Loss: 1.2863: 100%|██████████| 79/79 [00:00<00:00, 214.85it/s]
Epoch 7, Loss: 1.0250: 100%|██████████| 79/79 [00:00<00:00, 217.81it/s]
Epoch 8, Loss: 1.3183: 100%|██████████| 79/79 [00:00<00:00, 220.65it/s]
Epoch 9, Loss: 1.3804: 100%|██████████| 79/79 [00:00<00:00, 210.62it/s]


In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.has_mps else "cpu")
#device = torch.device("cpu")
print(device)

mps


In [35]:
model = model.to(device)

In [36]:
# predict the ratings for the test set
y_true = []
y_pred = []
for i, data in enumerate(test_dl):
    model.eval()
    users, items, ratings = data
    users = users.long().to(device)
    items = items.long().to(device)
    ratings = ratings.float().to(device)
    
    outputs = model(users, items)
    y_true.extend(ratings.cpu().detach().numpy())
    y_pred.extend(outputs.cpu().detach().numpy())

In [37]:
# calculate the mean squared error
from sklearn.metrics import mean_squared_error
mean_squared_error(y_true, y_pred)


1.231655

iii. Nueral collaborative filtering model
- Variant 2 with MLP and GMF


Here , NCF adopts two pathways to model users and items: 1) element-wise product of vectors, 2) concatenation of vectors. To learn interactions after concatenating of users and items latent features, the standard MLP model is applied. We allow GMF and MLP to learn separate embeddings

In [38]:
#### MLP and GMF in NCF
# create a model class
class GMF(nn.Module):
    def __init__(self, n_users, n_items, n_factors=40):
        super().__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.item_factors = nn.Embedding(n_items, n_factors)
        #initialize the embeddings, this is important. Else, model can struggle to learn
        nn.init.normal_(self.user_factors.weight, std=0.01)
        nn.init.normal_(self.item_factors.weight, std=0.01)
        
    def forward(self, user, item):
        return (self.user_factors(user) * self.item_factors(item)).sum(1)
        
class MLP(nn.Module):
    def __init__(self, n_users, n_items, n_factors=40):
        super().__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.item_factors = nn.Embedding(n_items, n_factors)
        #initialize the embeddings, this is important. Else, model can struggle to learn
        nn.init.normal_(self.user_factors.weight, std=0.01)
        nn.init.normal_(self.item_factors.weight, std=0.01)
        
        self.user_bias = nn.Embedding(n_users, 1)
        self.item_bias = nn.Embedding(n_items, 1)
        nn.init.constant_(self.user_bias.weight, 0)
        nn.init.constant_(self.item_bias.weight, 0)
        
        self.fc1 = nn.Linear(2*n_factors, 10) # 2*n_factors because we are concatenating user and item embeddings
        self.fc2 = nn.Linear(10, 1) # 1 because we are predicting a single value
        
        
    def forward(self, user, item):
        user_vec = self.user_factors(user)
        item_vec = self.item_factors(item)
        user_bias = self.user_bias(user)
        item_bias = self.item_bias(item)
        
        x = torch.cat([user_vec, item_vec], dim=1) # concatenate user and item embeddings. Dimensions are batch_size x (2*n_factors)
        x = F.relu(self.fc1(x)) # apply a non-linear activation function. Dimensions are batch_size x 20
        x = self.fc2(x) # Dimensions are batch_size x 1
        x = x + user_bias + item_bias # add the bias terms, Dimensions are batch_size x 1
        return x.squeeze(1) # squeeze the output to get a single value


class NCF(nn.Module):
    def __init__(self, n_users, n_items, n_factors=40):
        super().__init__()
        self.gmf = GMF(n_users, n_items, n_factors)
        self.mlp = MLP(n_users, n_items, n_factors)
        self.fc = nn.Linear(2*n_factors, 1) # 2*n_factors because we are concatenating the output of GMF and MLP
        nn.init.kaiming_uniform_(self.fc.weight) # initialize the weights of the final layer
        
    def forward(self, user, item):
        gmf = self.gmf(user, item)
        mlp = self.mlp(user, item)
        x = torch.cat([gmf, mlp], dim=1) # concatenate the output of GMF and MLP. Dimensions are batch_size x (2*n_factors)
        x = self.fc(x) # Dimensions are batch_size x 1
        return x.squeeze(1) # squeeze the output to get a single value

In [49]:
n_users = len(user2idx)
n_items = len(item2idx)

In [50]:
model = NCF(n_users, n_items, n_factors=40)

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

epochs = 10

In [51]:
# no of batches
len(train_dl)

79

In [52]:
# number of parameters
sum(p.numel() for p in model.parameters() if p.requires_grad)



107257

In [53]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    train_tqdm = tqdm(train_dl)
    for i, data in enumerate(train_tqdm):
        users, items, ratings = data
        #print(users.shape, items.shape, ratings.shape)
        users = users.long()
        items = items.long()
        ratings = ratings.float()
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        outputs = model(users, items)
        loss = criterion(outputs, ratings)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        train_tqdm.set_description(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 1.5174: 100%|██████████| 79/79 [00:00<00:00, 164.62it/s]
Epoch 1, Loss: 1.3356: 100%|██████████| 79/79 [00:00<00:00, 185.45it/s]
Epoch 2, Loss: 1.5388: 100%|██████████| 79/79 [00:00<00:00, 190.42it/s]
Epoch 3, Loss: 1.6575: 100%|██████████| 79/79 [00:00<00:00, 188.15it/s]
Epoch 4, Loss: 1.1721: 100%|██████████| 79/79 [00:00<00:00, 186.02it/s]
Epoch 5, Loss: 1.3692: 100%|██████████| 79/79 [00:00<00:00, 189.61it/s]
Epoch 6, Loss: 1.5626: 100%|██████████| 79/79 [00:00<00:00, 168.10it/s]
Epoch 7, Loss: 1.2125: 100%|██████████| 79/79 [00:00<00:00, 175.39it/s]
Epoch 8, Loss: 1.1858: 100%|██████████| 79/79 [00:00<00:00, 185.53it/s]
Epoch 9, Loss: 1.1830: 100%|██████████| 79/79 [00:00<00:00, 181.59it/s]
