# SVD

In [39]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

# Load MovieLens dataset
df = pd.read_csv("./ml-latest-small/ratings.csv")
df['cat_movie'] = df['movieId'].astype("category").cat.codes

num_users = df['userId'].nunique()
num_items = df['movieId'].nunique()

# Create user-item rating matrix
user_item_matrix = df.pivot(index='userId', columns='cat_movie', values='rating').fillna(0)

# Convert to NumPy array
ratings_matrix = user_item_matrix.to_numpy()
ratings_matrix


array([[4. , 0. , 4. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [2.5, 2. , 2. , ..., 0. , 0. , 0. ],
       [3. , 0. , 0. , ..., 0. , 0. , 0. ],
       [5. , 0. , 0. , ..., 0. , 0. , 0. ]], shape=(610, 9724))

In [40]:
# Compute SVD (k=50 latent features)
k = 50
U, sigma, Vt = svds(ratings_matrix, k=k)

# Convert singular values to diagonal matrix
sigma = np.diag(sigma)

# Reconstruct approximate ratings matrix
predicted_ratings = np.dot(np.dot(U, sigma), Vt)
predicted_ratings

array([[ 2.18187197e+00,  3.93674189e-01,  8.38185756e-01, ...,
        -2.49842711e-02, -2.49842711e-02, -5.89881001e-02],
       [ 2.09809067e-01,  4.82051887e-03,  3.07424005e-02, ...,
         1.88951263e-02,  1.88951263e-02,  3.19658766e-02],
       [ 1.33940814e-02,  3.47258164e-02,  5.05247472e-02, ...,
        -1.61232411e-03, -1.61232411e-03, -5.29984436e-04],
       ...,
       [ 2.30963539e+00,  2.70243898e+00,  2.26419696e+00, ...,
        -1.25165145e-02, -1.25165145e-02,  9.27520866e-02],
       [ 7.83182598e-01,  5.30142683e-01,  9.79748203e-02, ...,
         9.84577917e-04,  9.84577917e-04, -5.49383653e-03],
       [ 5.35809290e+00, -2.88817350e-01, -9.07680249e-02, ...,
        -2.79227416e-02, -2.79227416e-02,  3.55476113e-02]],
      shape=(610, 9724))

In [41]:
def recommend_for_user(user_id, predicted_ratings, num_recommendations=5):
    user_row = user_id - 1  # Adjust for zero-based indexing
    sorted_movies = np.argsort(predicted_ratings[user_row])[::-1]  # Sort by predicted rating
    return sorted_movies[:num_recommendations]

# Example recommendation
user_id = 10
recommended_movies = recommend_for_user(user_id, predicted_ratings)
print(f"Recommended movies for user {user_id}: {recommended_movies}")

Recommended movies for user 10: [4131 4791 3633  314 4354]


# NMF Matrix factorization

In [44]:
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF

# Load MovieLens dataset
df = pd.read_csv("./ml-latest-small/ratings.csv")
df['cat_movie'] = df['movieId'].astype("category").cat.codes

num_users = df['userId'].nunique()
num_items = df['movieId'].nunique()

# Create user-item rating matrix
user_item_matrix = df.pivot(index='userId', columns='cat_movie', values='rating').fillna(0)

# Convert to NumPy array
ratings_matrix = user_item_matrix.to_numpy()
ratings_matrix

array([[4. , 0. , 4. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [2.5, 2. , 2. , ..., 0. , 0. , 0. ],
       [3. , 0. , 0. , ..., 0. , 0. , 0. ],
       [5. , 0. , 0. , ..., 0. , 0. , 0. ]], shape=(610, 9724))

In [45]:
# Apply NMF (k=50 latent features)
k = 50
nmf_model = NMF(n_components=k, init='random', random_state=42, max_iter=500)
W = nmf_model.fit_transform(ratings_matrix)
H = nmf_model.components_

# Reconstruct predicted ratings
predicted_ratings = np.dot(W, H)

In [46]:
def recommend_for_user(user_id, predicted_ratings, num_recommendations=5):
    user_row = user_id - 1  # Adjust for zero-based indexing
    sorted_movies = np.argsort(predicted_ratings[user_row])[::-1]  # Sort by predicted rating
    return sorted_movies[:num_recommendations]

# Example recommendation
user_id = 10
recommended_movies = recommend_for_user(user_id, predicted_ratings)
print(f"Recommended movies for user {user_id}: {recommended_movies}")

Recommended movies for user 10: [6693  277 7355  314 4791]


# Variational Autoencoder

In [34]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Load MovieLens dataset
df = pd.read_csv("./ml-latest-small/ratings.csv")
num_users = df['userId'].nunique()
num_items = df['movieId'].nunique()
df['cat_movie'] = df['movieId'].astype("category").cat.codes

# Convert user-item interactions to a matrix
user_item_matrix = np.zeros((num_users, num_items))
for row in df.itertuples():
    user_item_matrix[row.userId - 1, row.cat_movie - 1] = row.rating

# Convert to PyTorch tensor
user_item_matrix = torch.tensor(user_item_matrix, dtype=torch.float32)

# Split into train and test (80-20 split)
train_matrix = user_item_matrix.clone()
test_matrix = user_item_matrix.clone()
test_matrix[np.random.rand(*test_matrix.shape) < 0.8] = 0  # Mask 80% of interactions for training


In [35]:
class VAERecommender(nn.Module):
    def __init__(self, num_items, latent_dim=50):
        super(VAERecommender, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(num_items, 200),
            nn.ReLU(),
            nn.Linear(200, 100),
            nn.ReLU()
        )
        self.mu = nn.Linear(100, latent_dim)
        self.logvar = nn.Linear(100, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 100),
            nn.ReLU(),
            nn.Linear(100, 200),
            nn.ReLU(),
            nn.Linear(200, num_items),
            nn.Sigmoid()  # Output probabilities of items
        )

    def forward(self, x):
        encoded = self.encoder(x)
        mu, logvar = self.mu(encoded), self.logvar(encoded)
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std  # Reparameterization trick
        reconstructed = self.decoder(z)
        return reconstructed, mu, logvar


In [36]:
def vae_loss(reconstructed, original, mu, logvar):
    reconstruction_loss = nn.MSELoss()(reconstructed, original)
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return reconstruction_loss + 0.0001 * kl_divergence  # Small weight on KL term

In [37]:
# Define model, optimizer
latent_dim = 50
model = VAERecommender(num_items, latent_dim=latent_dim)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
train_matrix = train_matrix.to(device)

# Training loop
epochs = 30
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    reconstructed, mu, logvar = model(train_matrix)
    loss = vae_loss(reconstructed, train_matrix, mu, logvar)
    
    loss.backward()
    optimizer.step()
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")


Epoch 1/30, Loss: 0.4308
Epoch 2/30, Loss: 0.4269
Epoch 3/30, Loss: 0.4188
Epoch 4/30, Loss: 0.4104
Epoch 5/30, Loss: 0.4022
Epoch 6/30, Loss: 0.3938
Epoch 7/30, Loss: 0.3843
Epoch 8/30, Loss: 0.3733
Epoch 9/30, Loss: 0.3605
Epoch 10/30, Loss: 0.3484
Epoch 11/30, Loss: 0.3337
Epoch 12/30, Loss: 0.3185
Epoch 13/30, Loss: 0.3033
Epoch 14/30, Loss: 0.2879
Epoch 15/30, Loss: 0.2740
Epoch 16/30, Loss: 0.2595
Epoch 17/30, Loss: 0.2468
Epoch 18/30, Loss: 0.2375
Epoch 19/30, Loss: 0.2290
Epoch 20/30, Loss: 0.2227
Epoch 21/30, Loss: 0.2175
Epoch 22/30, Loss: 0.2150
Epoch 23/30, Loss: 0.2123
Epoch 24/30, Loss: 0.2106
Epoch 25/30, Loss: 0.2097
Epoch 26/30, Loss: 0.2089
Epoch 27/30, Loss: 0.2083
Epoch 28/30, Loss: 0.2080
Epoch 29/30, Loss: 0.2078
Epoch 30/30, Loss: 0.2075


In [38]:
def recommend_for_user(user_id, model, num_recommendations=5):
    model.eval()
    user_vector = train_matrix[user_id].unsqueeze(0).to(device)

    with torch.no_grad():
        predicted_ratings, _, _ = model(user_vector)

    recommended_movies = predicted_ratings.cpu().numpy().argsort()[::-1][:num_recommendations]
    return recommended_movies

# Example recommendation
user_id = 10  # Example user
recommended_movies = recommend_for_user(user_id, model)
print(f"Recommended movies for user {user_id}: {recommended_movies}")


Recommended movies for user 10: [[8899 8029 7159 ...  313  896 2143]]


# Neural Collaborative filtering

In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Load dataset (MovieLens sample)
df = pd.read_csv("./ml-latest-small/ratings.csv")

# Encode users and items as integers
df['userId'] = df['userId'].astype("category").cat.codes
df['movieId'] = df['movieId'].astype("category").cat.codes

# Train-test split
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [7]:
class MovieLensDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['userId'].values, dtype=torch.long)
        self.items = torch.tensor(df['movieId'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]

# Create DataLoaders
train_dataset = MovieLensDataset(train_data)
test_dataset = MovieLensDataset(test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [8]:
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # Output rating prediction
        )

    def forward(self, user_ids, item_ids):
        user_embedded = self.user_embedding(user_ids)
        item_embedded = self.item_embedding(item_ids)
        x = torch.cat([user_embedded, item_embedded], dim=-1)
        x = self.fc_layers(x)
        return x.squeeze()

# Get number of unique users and items
num_users = df['userId'].nunique()
num_items = df['movieId'].nunique()

# Instantiate model
model = NCF(num_users, num_items, embedding_dim=50)


In [9]:
# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for users, items, ratings in train_loader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)

        optimizer.zero_grad()
        predictions = model(users, items)
        loss = loss_fn(predictions, ratings)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1/10, Loss: 1.0693
Epoch 2/10, Loss: 0.8170
Epoch 3/10, Loss: 0.7517
Epoch 4/10, Loss: 0.7078
Epoch 5/10, Loss: 0.6709
Epoch 6/10, Loss: 0.6381
Epoch 7/10, Loss: 0.6075
Epoch 8/10, Loss: 0.5756
Epoch 9/10, Loss: 0.5433
Epoch 10/10, Loss: 0.5105


In [10]:
from sklearn.metrics import mean_squared_error

model.eval()
all_preds, all_actuals = [], []

with torch.no_grad():
    for users, items, ratings in test_loader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)
        predictions = model(users, items)
        
        all_preds.extend(predictions.cpu().numpy())
        all_actuals.extend(ratings.cpu().numpy())

rmse = np.sqrt(mean_squared_error(all_actuals, all_preds))
print(f"Test RMSE: {rmse:.4f}")

Test RMSE: 0.9308


In [11]:
def recommend_for_user(user_id, model, num_recommendations=5):
    model.eval()
    user_tensor = torch.tensor([user_id] * num_items, dtype=torch.long).to(device)
    item_tensor = torch.arange(num_items, dtype=torch.long).to(device)

    with torch.no_grad():
        scores = model(user_tensor, item_tensor)

    top_items = scores.cpu().numpy().argsort()[-num_recommendations:][::-1]
    return top_items

user_id = 10  # Example user ID
recommended_items = recommend_for_user(user_id, model)
print(f"Recommended items for user {user_id}: {recommended_items}")

Recommended items for user 10: [5875 2409 7650 2645 2934]


# Attention

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

class AttentionLayer(nn.Module):
    def __init__(self, embedding_dim):
        super(AttentionLayer, self).__init__()
        self.attention = nn.Linear(embedding_dim * 2, 1)  # Computes attention scores

    def forward(self, user_emb, item_emb):
        combined = torch.cat([user_emb, item_emb], dim=-1)  # Concatenate embeddings
        attn_weights = torch.sigmoid(self.attention(combined))  # Compute attention scores
        weighted_emb = attn_weights * combined  # Apply attention
        return weighted_emb

class NCFWithAttention(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50):
        super(NCFWithAttention, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

        self.attention = AttentionLayer(embedding_dim)

        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # Output rating prediction
        )

    def forward(self, user_ids, item_ids):
        user_embedded = self.user_embedding(user_ids)
        item_embedded = self.item_embedding(item_ids)

        weighted_features = self.attention(user_embedded, item_embedded)  # Apply attention
        output = self.fc_layers(weighted_features)
        return output.squeeze()

In [13]:
# Define model
num_users = df['userId'].nunique()
num_items = df['movieId'].nunique()
model = NCFWithAttention(num_users, num_items, embedding_dim=50)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for users, items, ratings in train_loader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)

        optimizer.zero_grad()
        predictions = model(users, items)
        loss = loss_fn(predictions, ratings)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1/10, Loss: 1.1970
Epoch 2/10, Loss: 0.8338
Epoch 3/10, Loss: 0.7651
Epoch 4/10, Loss: 0.7199
Epoch 5/10, Loss: 0.6833
Epoch 6/10, Loss: 0.6522
Epoch 7/10, Loss: 0.6239
Epoch 8/10, Loss: 0.5963
Epoch 9/10, Loss: 0.5695
Epoch 10/10, Loss: 0.5396


In [14]:
from sklearn.metrics import mean_squared_error

model.eval()
all_preds, all_actuals = [], []

with torch.no_grad():
    for users, items, ratings in test_loader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)
        predictions = model(users, items)
        
        all_preds.extend(predictions.cpu().numpy())
        all_actuals.extend(ratings.cpu().numpy())

rmse = np.sqrt(mean_squared_error(all_actuals, all_preds))
print(f"Test RMSE: {rmse:.4f}")

Test RMSE: 0.9243


In [15]:
def recommend_for_user(user_id, model, num_recommendations=5):
    model.eval()
    user_tensor = torch.tensor([user_id] * num_items, dtype=torch.long).to(device)
    item_tensor = torch.arange(num_items, dtype=torch.long).to(device)

    with torch.no_grad():
        scores = model(user_tensor, item_tensor)

    top_items = scores.cpu().numpy().argsort()[-num_recommendations:][::-1]
    return top_items

user_id = 10  # Example user ID
recommended_items = recommend_for_user(user_id, model)
print(f"Recommended items for user {user_id}: {recommended_items}")

Recommended items for user 10: [5862 6403 6298 6680 9004]


# Self-Attention

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim

class SelfAttentionLayer(nn.Module):
    def __init__(self, embedding_dim, num_heads=2):
        super(SelfAttentionLayer, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads, batch_first=True)

    def forward(self, user_emb, item_emb):
        combined = torch.cat([user_emb.unsqueeze(1), item_emb.unsqueeze(1)], dim=1)  # Shape: (batch, 2, embedding_dim)
        attn_output, _ = self.attention(combined, combined, combined)  # Self-attention
        return attn_output.mean(dim=1)  # Aggregate attention outputs


In [17]:
class TransformerRecommender(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50, num_heads=2):
        super(TransformerRecommender, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

        self.self_attention = SelfAttentionLayer(embedding_dim, num_heads)

        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # Output rating prediction
        )

    def forward(self, user_ids, item_ids):
        user_embedded = self.user_embedding(user_ids)
        item_embedded = self.item_embedding(item_ids)

        attended_features = self.self_attention(user_embedded, item_embedded)  # Self-Attention
        output = self.fc_layers(attended_features)
        return output.squeeze()


In [18]:
# Get the number of unique users and items
num_users = df['userId'].nunique()
num_items = df['movieId'].nunique()

# Define model
model = TransformerRecommender(num_users, num_items, embedding_dim=50, num_heads=2)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for users, items, ratings in train_loader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)

        optimizer.zero_grad()
        predictions = model(users, items)
        loss = loss_fn(predictions, ratings)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1/10, Loss: 1.2665
Epoch 2/10, Loss: 0.8947
Epoch 3/10, Loss: 0.8208
Epoch 4/10, Loss: 0.7602
Epoch 5/10, Loss: 0.7080
Epoch 6/10, Loss: 0.6609
Epoch 7/10, Loss: 0.6216
Epoch 8/10, Loss: 0.5873
Epoch 9/10, Loss: 0.5540
Epoch 10/10, Loss: 0.5238


In [19]:
from sklearn.metrics import mean_squared_error

model.eval()
all_preds, all_actuals = [], []

with torch.no_grad():
    for users, items, ratings in test_loader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)
        predictions = model(users, items)
        
        all_preds.extend(predictions.cpu().numpy())
        all_actuals.extend(ratings.cpu().numpy())

rmse = np.sqrt(mean_squared_error(all_actuals, all_preds))
print(f"Test RMSE: {rmse:.4f}")


Test RMSE: 0.9497


In [20]:
def recommend_for_user(user_id, model, num_recommendations=5):
    model.eval()
    user_tensor = torch.tensor([user_id] * num_items, dtype=torch.long).to(device)
    item_tensor = torch.arange(num_items, dtype=torch.long).to(device)

    with torch.no_grad():
        scores = model(user_tensor, item_tensor)

    top_items = scores.cpu().numpy().argsort()[-num_recommendations:][::-1]
    return top_items

user_id = 10  # Example user ID
recommended_items = recommend_for_user(user_id, model)
print(f"Recommended items for user {user_id}: {recommended_items}")

Recommended items for user 10: [7435  924  405 7683 1648]


# BERT and positional encoding

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

class PositionalEncoding(nn.Module):
    def __init__(self, embedding_dim, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, embedding_dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, embedding_dim, 2).float() * (-math.log(10000.0) / embedding_dim))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        self.pe = pe.unsqueeze(0)  # Shape: (1, max_len, embedding_dim)

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :].to(x.device)  # Add positional encodings


In [22]:
class BERTRecommender(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50, num_heads=2, num_transformer_layers=2):
        super(BERTRecommender, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.positional_encoding = PositionalEncoding(embedding_dim)

        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=num_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_transformer_layers)

        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # Output rating prediction
        )

    def forward(self, user_ids, item_ids):
        user_emb = self.user_embedding(user_ids).unsqueeze(1)  # Shape: (batch, 1, embedding_dim)
        item_emb = self.item_embedding(item_ids).unsqueeze(1)  # Shape: (batch, 1, embedding_dim)

        combined_emb = torch.cat([user_emb, item_emb], dim=1)  # Shape: (batch, 2, embedding_dim)
        combined_emb = self.positional_encoding(combined_emb)

        transformer_output = self.transformer_encoder(combined_emb)  # Shape: (batch, 2, embedding_dim)
        attended_features = transformer_output.mean(dim=1)  # Aggregate across positions

        return self.fc_layers(attended_features).squeeze()


In [23]:
# Define model
num_users = df['userId'].nunique()
num_items = df['movieId'].nunique()
model = BERTRecommender(num_users, num_items, embedding_dim=50, num_heads=2, num_transformer_layers=2)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for users, items, ratings in train_loader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)

        optimizer.zero_grad()
        predictions = model(users, items)
        loss = loss_fn(predictions, ratings)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")




Epoch 1/10, Loss: 1.0799
Epoch 2/10, Loss: 0.8351
Epoch 3/10, Loss: 0.7791
Epoch 4/10, Loss: 0.7457
Epoch 5/10, Loss: 0.7221
Epoch 6/10, Loss: 0.7069
Epoch 7/10, Loss: 0.6949
Epoch 8/10, Loss: 0.6851
Epoch 9/10, Loss: 0.6748
Epoch 10/10, Loss: 0.6680


In [24]:
from sklearn.metrics import mean_squared_error

model.eval()
all_preds, all_actuals = [], []

with torch.no_grad():
    for users, items, ratings in test_loader:
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)
        predictions = model(users, items)
        
        all_preds.extend(predictions.cpu().numpy())
        all_actuals.extend(ratings.cpu().numpy())

rmse = np.sqrt(mean_squared_error(all_actuals, all_preds))
print(f"Test RMSE: {rmse:.4f}")


Test RMSE: 0.9065


In [25]:
def recommend_for_user(user_id, model, num_recommendations=5):
    model.eval()
    user_tensor = torch.tensor([user_id] * num_items, dtype=torch.long).to(device)
    item_tensor = torch.arange(num_items, dtype=torch.long).to(device)

    with torch.no_grad():
        scores = model(user_tensor, item_tensor)

    top_items = scores.cpu().numpy().argsort()[-num_recommendations:][::-1]
    return top_items

user_id = 10  # Example user ID
recommended_items = recommend_for_user(user_id, model)
print(f"Recommended items for user {user_id}: {recommended_items}")


Recommended items for user 10: [ 602  659 4900 7742 7793]
