In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader, Subset
import random

In [None]:
rating_df = pd.read_csv('data/MovieLens/rating.csv')
rating_df.head()

In [None]:
rating_df['normalized_rating'] = rating_df.rating / max(rating_df.rating)
print(min(rating_df.normalized_rating))
print(max(rating_df.normalized_rating))

In [None]:
sample_portion = 0.03
rating_df['user_idx'] = rating_df.userId - min(rating_df.userId)
rating_df['movie_idx'] = rating_df.movieId - min(rating_df.movieId)
movie_lens_tensor = torch.tensor(rating_df[['user_idx', 'movie_idx', 'normalized_rating']].sample(frac=sample_portion).to_numpy(), dtype=torch.float)
movie_lens_dataset = TensorDataset(movie_lens_tensor)
movie_lens_dataloader = DataLoader(movie_lens_dataset, batch_size=500, shuffle=True)
print(f"Input size={len(movie_lens_dataset)}")

In [None]:
class NCF(nn.Module):
    def __init__(self, user_dim, movie_dim, embedding_dim=128, dim_scale=3):
        super().__init__()
        # self.embedding_usr = nn.Embedding(num_embeddings=user_dim, embedding_dim=user_dim//dim_scale)
        # self.embedding_movie = nn.Embedding(num_embeddings=movie_dim, embedding_dim=movie_dim//dim_scale)
        self.embedding_usr = nn.Embedding(num_embeddings=user_dim, embedding_dim=embedding_dim)
        self.embedding_movie = nn.Embedding(num_embeddings=movie_dim, embedding_dim=embedding_dim)                                  
        self.ncf = nn.Sequential(
            nn.Linear(in_features=embedding_dim*2, out_features=128, bias=True),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=64, bias=True),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=1, bias=True),
            nn.Sigmoid()
        )
    
    # 500 x 3
    def forward(self, x):
        user_embedded = self.embedding_usr(x[:,0].int()) # Get all batch_size from the 1st dimension
        movie_embedded = self.embedding_movie(x[:,1].int()) # Get all batch_size from the 1st dimension
        embedding = torch.cat([user_embedded, movie_embedded], dim=1)
        return self.ncf(embedding)


In [None]:
user_dim = max(rating_df.user_idx) + 1
movie_dim = max(rating_df.movie_idx) + 1
ncf = NCF(user_dim=user_dim, movie_dim=movie_dim)
epochs = 200
optimizer = optim.Adam(ncf.parameters(), lr=1e-3)
loss_func = torch.nn.MSELoss()

for epoch in range(epochs):
    err_loss = 0
    for i, dl in enumerate(movie_lens_dataloader):
        x_output = ncf(dl[0])
        loss = loss_func(x_output, dl[0][:,2])
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        err_loss += loss.item()
    print(f"Epoch={epoch}: loss={err_loss}")        