In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

import requests 
import zipfile
import os

In [None]:
#run this first time
# Define the URL for the MovieLens 100k dataset
movielens_url = "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
output_dir = "."

# Create directory if it doesn't exist
if not os.path.exists(os.path.join(output_dir, "ml-100k")):
    os.makedirs(os.path.join(output_dir, "ml-100k"))

print(f"Downloading MovieLens dataset from {movielens_url}...")
response = requests.get(movielens_url)

if response.status_code == 200:
    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
        z.extractall(output_dir)
    print(f"Dataset downloaded and extracted to {output_dir}")
else:
    print(f"Failed to download dataset. Status code: {response.status_code}")

In [6]:
#load data
ratings_cols = ['user_id', 'item_id', 'rating', 'timestamp']
ratings_df = pd.read_csv('ml-100k/u.data', sep='\t', names=ratings_cols)

# u.item: item id | movie title | release date | video release date | IMDb URL | genre fields (19)
movies_cols = ['item_id', 'title', 'release_date', 'video_release_date', 'imdb_url',
               'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy',
               'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
               'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies_df = pd.read_csv('ml-100k/u.item', sep='|', names=movies_cols, encoding='latin-1')

# u.user: user id | age | gender | occupation | zip code
users_cols = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
users_df = pd.read_csv('ml-100k/u.user', sep='|', names=users_cols)

print("Ratings DataFrame head:")
display(ratings_df.head())

print("\nMovies DataFrame head:")
display(movies_df.head())

print("\nUsers DataFrame head:")
display(users_df.head())


Ratings DataFrame head:


Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596



Movies DataFrame head:


Unnamed: 0,item_id,title,release_date,video_release_date,imdb_url,unknown,Action,Adventure,Animation,Children,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0



Users DataFrame head:


Unnamed: 0,user_id,age,gender,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [27]:
#prep data for training

unique_users = sorted(ratings_df['user_id'].unique())
user_to_index = {user_id: index for index, user_id in enumerate(unique_users)}
unique_movies = sorted(ratings_df['item_id'].unique())
movie_to_index = {movie_id: index for index, movie_id in enumerate(unique_movies)}

In [28]:
# Map original user and movie IDs from ratings_df_small to 0-based indices
user_indices = torch.tensor([user_to_index[uid] for uid in ratings_df['user_id'].values], dtype=torch.long)
movie_indices = torch.tensor([movie_to_index[mid] for mid in ratings_df['item_id'].values], dtype=torch.long)
ratings_tensor = torch.tensor(ratings_df['rating'].values, dtype=torch.float32)

dataset = TensorDataset(user_indices, movie_indices, ratings_tensor)

# Define batch size
batch_size = 64

train_ratio = 0.8
train_size = int(train_ratio * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [29]:
user_ids_batch, movie_ids_batch, ratings_batch = next(iter(train_loader))

user_test, movie_test, ratings_test = next(iter(test_loader))

In [30]:
n_latent = 5
n_users = len(user_to_index)
n_movies = len(movie_to_index)

In [None]:
class AttentionFM(nn.Module):
    def __init__(self, user_vocab_size, movie_vocab_size, embedding_dim, attention_size):
        super(AttentionFM, self).__init__()
        self.global_bias = nn.Parameter(torch.zeros(1))
        self.embedding_user = nn.Embedding(user_vocab_size, embedding_dim)
        self.user_bias = nn.Embedding(user_vocab_size, 1)
        self.embedding_movie = nn.Embedding(movie_vocab_size, embedding_dim)
        self.movie_bias = nn.Embedding(movie_vocab_size, 1)
        self.attention_user = nn.Linear(embedding_dim, attention_size)
        self.attention_movie = nn.Linear(embedding_dim, attention_size)
        self.attention_v = nn.Linear(attention_size, 1) 


    def forward(self, user_indices, movie_indices):
        user_embedding = self.embedding_user(user_indices)
        movie_embedding = self.embedding_movie(movie_indices)

        user_attention_out = self.attention_user(user_embedding)
        movie_attention_out = self.attention_movie(movie_embedding)

        # try tanh
        attention = self.attention_v(torch.relu(user_attention_out + movie_attention_out))

        # try sigmoid
        # attention_weights = torch.softmax(attention_logits, dim=1) 

        # Bias terms
        b_u = self.user_bias(user_indices).squeeze()
        b_i = self.movie_bias(movie_indices).squeeze()

        # Dot product for interaction term
        interaction = (user_embedding * movie_embedding).sum(dim=1)
        weighted_interaction = (attention * (user_embedding * movie_embedding)).sum(dim=1)
        prediction = self.global_bias + b_u + b_i + weighted_interaction

        return prediction.squeeze()

In [35]:
model = AttentionFM(n_users, n_movies, n_latent, attention_size=n_latent)
output_predictions = model(user_ids_batch, movie_ids_batch)
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

epochs = 10
loss_over_epoch = 0.0
for epoch in range(epochs):
  total_loss = 0.0
  for batch_id, (user_indices_batch, movie_indices_batch, ratings_batch) in enumerate(train_loader):
    optimizer.zero_grad()
    predictions = model(user_indices_batch, movie_indices_batch)
    loss = loss_function(predictions, ratings_batch)
    loss.backward()
    optimizer.step()
    total_loss+=loss.item() #mean loss for a batch
  avg_loss = total_loss / len(train_loader)
  print(f"Epoch {epoch+1}/{epochs}, MSE: {avg_loss:.4f}")

Epoch 1/10, MSE: 3.4150
Epoch 2/10, MSE: 2.9518
Epoch 3/10, MSE: 2.7699
Epoch 4/10, MSE: 2.6143
Epoch 5/10, MSE: 2.4774
Epoch 6/10, MSE: 2.3570
Epoch 7/10, MSE: 2.2506
Epoch 8/10, MSE: 2.1564
Epoch 9/10, MSE: 2.0716
Epoch 10/10, MSE: 1.9973


In [36]:
#get test mse
model.eval()

with torch.no_grad():
    total_test_loss = 0.0
    for user_indices_batch, movie_indices_batch, ratings_batch in test_loader:
        predictions = model(user_indices_batch, movie_indices_batch)
        loss = loss_function(predictions, ratings_batch)
        total_test_loss += loss.item()
    avg_test_loss = total_test_loss / len(test_loader)
    print(f"Test MSE: {avg_test_loss:.4f}")


Test MSE: 1.9231
