# Collaborative filtering project

In this project, the task is to create a paper recommendation system. The system consists of 10,000 scientists and 1,000 papers. Scientists give ratings between 1â€“5 to the papers that they read. Since not all scientists have read every paper, we only have a limited amount of observations of these ratings. Additionally, each scientist has a wishlist of papers that they would like to read in the future. Your task is to fill in the missing observations using the provided rating and wishlist data, such that we can recommend papers to scientists that we expect them to rate highly.

More specifically, there are three data sources:
 - `train_tbr.csv` containing wishlist data.
 - `train_ratings.csv` containing observed rating data.
 - `sample_submission.csv` containing (scientist, paper) pairs that have to be rated for the evaluation of your method.

The data is available at `/cluster/courses/cil/collaborative_filtering/data` and an environment has been prepared for you at `/cluster/courses/cil/envs/collaborative_filtering`. You can activate the environment in your shell by running:
```bash
conda activate /cluster/courses/cil/envs/collaborative_filtering
```
If you wish to use notebooks on the cluster, you need to set the Environment path to `/cluster/courses/cil/envs/collaborative_filtering/bin` and load the `cuda/12.6` module.

**Evaluation**: Your models are evaluated using the root mean-squared error (RMSE) metric. Your grade is determined by a linear interpolation between the easy (grade 4) and hard (grade 6) baselines.

**Rules**: You are only allowed to use the data provided in `train_tbr.csv` and `train_ratings.csv` to make your predictions of `sample_submission.csv`. You are not allowed to use external data sources. But, you are allowed to use pre-trained models, as long as they are available publicly. Furthermore, no external API calls are allowed, except for downloading the weights of pre-trained models.

**We will verify your code for plagiarism and using solutions from previous years.**

[Link to Kaggle competition](https://www.kaggle.com/competitions/ethz-cil-collaborative-filtering-2025)


In [None]:
from typing import Tuple, Callable

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Make sure that results are reproducible by using a seed.

In [None]:
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

In [None]:
class SVDpp(nn.Module):
    def __init__(self, num_scientists: int = 10000, num_papers: int = 10000, emb_dim: int = 8):
        super().__init__()

        # embeddings for scientists and papers
        self.scientist_factors = nn.Embedding(num_scientists, emb_dim)
        self.paper_factors = nn.Embedding(num_papers, emb_dim)
        self.scientist_bias = nn.Embedding(num_scientists, 1)
        self.paper_bias = nn.Embedding(num_papers, 1)

        # global average rating - TODO: maybe come up with smth better
        self.global_bias = nn.Parameter(torch.tensor([3.5]), requires_grad=False)

        # init weights - TODO: not tuned rn
        nn.init.normal_(self.scientist_factors.weight, std=0.1)
        nn.init.normal_(self.paper_factors.weight, std=0.1)
        nn.init.constant_(self.scientist_bias.weight, 0.0)
        nn.init.constant_(self.paper_bias.weight, 0.0)

    def forward(self, scientist_ids, paper_ids):
        # latent factors and biases for current batch
        scientist_embeddings = self.scientist_factors(scientist_ids)
        paper_embeddings = self.paper_factors(paper_ids)
        # squeeze to remove extra dim
        scientist_biases = self.scientist_bias(scientist_ids).squeeze()
        paper_biases = self.paper_bias(paper_ids).squeeze()
    
        # dot product for interaction
        interaction = (scientist_embeddings * paper_embeddings).sum(dim=1)
    
        # predict ratings
        predicted_ratings = interaction + scientist_biases + paper_biases + self.global_bias
        return predicted_ratings


## Helper functions

In [None]:
# DATA_DIR = "/cluster/courses/cil/collaborative_filtering/data"
DATA_DIR = "./data"


def read_data_df() -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Reads in data and splits it into training and validation sets with a 75/25 split."""
    
    df = pd.read_csv(os.path.join(DATA_DIR, "train_ratings.csv"))
    implicit_df = pd.read_csv(os.path.join(DATA_DIR, "train_tbr.csv"))

    # Split sid_pid into sid and pid columns
    df[["sid", "pid"]] = df["sid_pid"].str.split("_", expand=True)
    df = df.drop("sid_pid", axis=1)
    df["sid"] = df["sid"].astype(int)
    df["pid"] = df["pid"].astype(int)
    
    # Split into train and validation dataset
    train_df, valid_df = train_test_split(df, test_size=0.25)
    return train_df, valid_df, implicit_df


def read_data_matrix(df: pd.DataFrame) -> np.ndarray:
    """Returns matrix view of the training data, where columns are scientists (sid) and
    rows are papers (pid)."""

    return df.pivot(index="sid", columns="pid", values="rating").values


def evaluate(valid_df: pd.DataFrame, pred_fn: Callable[[np.ndarray, np.ndarray], np.ndarray]) -> float:
    """
    Inputs:
        valid_df: Validation data, returned from read_data_df for example.
        pred_fn: Function that takes in arrays of sid and pid and outputs their rating predictions.

    Outputs: Validation RMSE
    """
    
    preds = pred_fn(valid_df["sid"].values, valid_df["pid"].values)
    return root_mean_squared_error(valid_df["rating"].values, preds)


def make_submission(pred_fn: Callable[[np.ndarray, np.ndarray], np.ndarray], filename: os.PathLike):
    """Makes a submission CSV file that can be submitted to kaggle.

    Inputs:
        pred_fn: Function that takes in arrays of sid and pid and outputs a score.
        filename: File to save the submission to.
    """
    
    df = pd.read_csv(os.path.join(DATA_DIR, "sample_submission.csv"))

    # Get sids and pids
    sid_pid = df["sid_pid"].str.split("_", expand=True)
    sids = sid_pid[0]
    pids = sid_pid[1]
    sids = sids.astype(int).values
    pids = pids.astype(int).values
    
    df["rating"] = pred_fn(sids, pids)
    df.to_csv(filename, index=False)

## Singular value decomposition

For the first method in this introduction, we will make use of the singular value decomposition (SVD) to construct the optimal rank-$k$ approximation (when measuring the Frobenius norm as error), according to the Eckart-Young theorem. Since the matrix needs to be fully observed in order to make use of SVD, we need to impute the missing values. In this case, we impute values with $3$.

In [None]:
def impute_values(mat: np.ndarray) -> np.ndarray:
    return np.nan_to_num(mat, nan=3.0)

In [None]:
train_df, valid_df, implicit_df = read_data_df()

In [None]:
# Define model (10k scientists, 1k papers, 32-dimensional embeddings) and optimizer
#model = EmbeddingDotProductModel(10_000, 1_000, 32).to(device)
#optim = torch.optim.Adam(model.parameters(), lr=1e-3)

model = SVDpp().to(device)
optim = torch.optim.Adam(model.parameters(), lr=6e-4, weight_decay=3e-5) # global on = 
# optim = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=3e-5) # global on = 0856
# optim = torch.optim.Adam(model.parameters(), lr=6e-4, weight_decay=3e-5) + global off = 0.867
#optim = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5)


In [None]:
def get_dataset(df: pd.DataFrame) -> torch.utils.data.Dataset:
    """Conversion from pandas data frame to torch dataset."""
    
    sids = torch.from_numpy(df["sid"].to_numpy())
    pids = torch.from_numpy(df["pid"].to_numpy())
    ratings = torch.from_numpy(df["rating"].to_numpy()).float()
    return torch.utils.data.TensorDataset(sids, pids, ratings)

In [None]:
train_dataset = get_dataset(train_df)
valid_dataset = get_dataset(valid_df)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=64, shuffle=False)

Training loop, which we run for 5 epochs.

In [None]:
import copy

NUM_EPOCHS = 20 
best_rmse = float("inf")
patience = 2
epochs_no_improve = 0
best_model_state = None

for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        optim.zero_grad()
        loss.backward()
        optim.step()

        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate on validation set
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    val_rmse = (total_val_mse / total_val_data) ** 0.5
    train_loss = total_loss / total_data
    print(f"[Epoch {epoch+1}] Train loss={train_loss:.3f}, Valid RMSE={val_rmse:.3f}")

    # Early stopping check
    if val_rmse < best_rmse:
        best_rmse = val_rmse
        best_model_state = copy.deepcopy(model.state_dict())
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print(f"Stopped early at epoch {epoch+1}. Best RMSE: {best_rmse:.4f}")
        break

# load best model back
model.load_state_dict(best_model_state)


In [None]:
pred_fn = lambda sids, pids: model(torch.from_numpy(sids).to(device), torch.from_numpy(pids).to(device)).clamp(1, 5).cpu().numpy()

# Evaluate on validation data
with torch.no_grad():
    val_score = evaluate(valid_df, pred_fn)

print(f"Validation RMSE: {val_score:.3f}")

In [None]:
with torch.no_grad():
    make_submission(pred_fn, "learned_embedding_submission.csv")

## Outlook

To further improve the score, students can make use of the information in `train_tbr.csv`, which contains the papers that scientists want to read. Furthermore, students can look into more modern collaborative filtering methods and techniques.

Have fun :)

## BFM

## BFM (explicit as implicit + implicit dataset)

In [None]:
from collections import defaultdict
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics
import myfm
from myfm import RelationBlock
from scipy import sparse as sps


FEATURE_COLUMNS = ['sid', 'pid']

ohe = OneHotEncoder(handle_unknown='ignore')

X_train = ohe.fit_transform(train_df[FEATURE_COLUMNS])
X_test = ohe.transform(valid_df[FEATURE_COLUMNS])

# index "0" is reserved for unknown ids.
scientist_to_index = defaultdict(lambda : 0, { sid: i+1 for i,sid in enumerate(np.unique(train_df.sid)) })
paper_to_index = defaultdict(lambda: 0, { pid: i+1 for i,pid in enumerate(np.unique(train_df.pid))})
SCIENTIST_ID_SIZE = len(scientist_to_index) + 1
PAPER_ID_SIZE = len(paper_to_index) + 1

paper_vs_read = dict()
scientist_vs_read = dict()
for row in train_df.itertuples():
    sid = row.sid
    pid = row.pid
    paper_vs_read.setdefault(pid, list()).append(sid)
    scientist_vs_read.setdefault(sid, list()).append(pid)
    
for row in implicit_df.itertuples():
    sid = row.sid
    pid = row.pid
    paper_vs_read.setdefault(pid, list()).append(sid)
    scientist_vs_read.setdefault(sid, list()).append(pid)
    
    
    
def augment_scientist_id(scientist_ids):
    Xs = []
    X_sid = sps.lil_matrix((len(scientist_ids), SCIENTIST_ID_SIZE))
    for index, scientist_id in enumerate(scientist_ids):
        X_sid[index, scientist_to_index[scientist_id]] = 1
    Xs.append(X_sid)
    X_is = sps.lil_matrix((len(scientist_ids), PAPER_ID_SIZE))
    for index, scientist_id in enumerate(scientist_ids):
        read_papers = scientist_vs_read.get(scientist_id, [])
        normalizer = 1 / max(len(read_papers), 1) ** 0.5
        for sid in read_papers:
            X_is[index, paper_to_index[sid]] = normalizer
    Xs.append(X_is)
    return sps.hstack(Xs, format='csr')
            
            
def augment_paper_id(paper_ids):
    Xs = []
    X_paper = sps.lil_matrix((len(paper_ids), PAPER_ID_SIZE))
    for index, paper_id in enumerate(paper_ids):
        X_paper[index, paper_to_index[paper_id]] = 1
    Xs.append(X_paper)
    X_ip = sps.lil_matrix((len(paper_ids), SCIENTIST_ID_SIZE))
    for index, paper_id in enumerate(paper_ids):
        read_scientists = paper_vs_read.get(paper_id, [])
        normalizer = 1 / max(len(read_scientists), 1) ** 0.5
        for sid in read_scientists:
            X_ip[index, scientist_to_index[sid]] = normalizer
    Xs.append(X_ip)
    return sps.hstack(Xs, format='csr')



train_sid_unique, train_sid_index = np.unique(train_df.sid, return_inverse=True)
train_pid_unique, train_pid_index = np.unique(train_df.pid, return_inverse=True)
scientist_data_train = augment_scientist_id(train_sid_unique)
paper_data_train = augment_paper_id(train_pid_unique)

test_sid_unique, test_sid_index = np.unique(valid_df.sid, return_inverse=True)
test_pid_unique, test_pid_index = np.unique(valid_df.pid, return_inverse=True)
scientist_data_test = augment_scientist_id(test_sid_unique)
paper_data_test = augment_paper_id(test_pid_unique)


block_scientist_train = RelationBlock(train_sid_index, scientist_data_train)
block_paper_train = RelationBlock(train_pid_index, paper_data_train)
block_scientist_test = RelationBlock(test_sid_index, scientist_data_test)
block_paper_test = RelationBlock(test_pid_index, paper_data_test)


fm_rb = myfm.MyFMRegressor(rank=32).fit(
    X_train,
    y = train_df.rating,
    X_rel=[block_scientist_train, block_paper_train],
    n_iter=300, n_kept_samples=300
)


def pred_fn(sids: np.ndarray, pids: np.ndarray) -> np.ndarray:
    # Create a DataFrame to handle the data consistently
    pred_df = pd.DataFrame({'sid': sids, 'pid': pids})
    
    # Transform the direct features
    X = ohe.transform(pred_df[FEATURE_COLUMNS])
    
    # Process unique scientist and paper IDs for relational blocks
    pred_sid_unique, pred_sid_index = np.unique(pred_df.sid, return_inverse=True)
    pred_pid_unique, pred_pid_index = np.unique(pred_df.pmake_submission(pred_fn, "learned_myfm_submission.csv"), return_inverse=True)
    
    # Generate augmented data for scientists and papers
    scientist_data_pred = augment_scientist_id(pred_sid_unique)
    paper_data_pred = augment_paper_id(pred_pid_unique)
    
    # Create the relational blocks
    block_scientist_pred = RelationBlock(pred_sid_index, scientist_data_pred)
    block_paper_pred = RelationBlock(pred_pid_index, paper_data_pred)
    
    # Make the prediction with relational blocks
    return fm_rb.predict(X, X_rel=[block_scientist_pred, block_paper_pred])


print(evaluate(valid_df, pred_fn))

In [None]:
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer, OneHotEncoder
from sklearn import metrics
import myfm

FEATURE_COLUMNS = ['sid', 'pid']
ohe = OneHotEncoder(handle_unknown='ignore')

X_train = ohe.fit_transform(train_df[FEATURE_COLUMNS])
X_test = ohe.transform(valid_df[FEATURE_COLUMNS])
y_train = train_df.rating.values
y_test = valid_df.rating.values

print([len(group) for group in ohe.categories_])


fm = myfm.MyFMOrderedProbit(
    rank=32, random_seed=42,
)
fm.fit(
    X_train, y_train - 1, n_iter=300, n_kept_samples=300,
    group_shapes=[len(group) for group in ohe.categories_]
)

p_ordinal = fm.predict_proba(X_test)

expected_rating = p_ordinal.dot(np.arange(1, 6))
rmse = ((y_test - expected_rating) ** 2).mean() ** .5
mae = np.abs(y_test - expected_rating).mean()
print(f'rmse={rmse}, mae={mae}')

In [None]:
make_submission(pred_fn, "learned_myfm_submission.csv")

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch.nn as nn
import torch.optim as optim
import copy

In [None]:
class MFWithImplicitContrastiveLoss(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim):
        super(MFWithImplicitContrastiveLoss, self).__init__()
        self.user_embeddings = nn.Embedding(num_users, embedding_dim)
        self.item_embeddings = nn.Embedding(num_items, embedding_dim)
    
    def forward(self, user, item):
        user_emb = self.user_embeddings(user)
        item_emb = self.item_embeddings(item)
        return (user_emb * item_emb).sum(dim=1)  # Predicted rating
    
    def contrastive_loss(self, user, pos_item, neg_item):
        user_emb = self.user_embeddings(user)
        pos_item_emb = self.item_embeddings(pos_item)
        neg_item_emb = self.item_embeddings(neg_item)
        
        # Calculate positive similarity
        pos_sim = (user_emb * pos_item_emb).sum(dim=1)  # (batch_size)
        
        # Calculate negative similarities for each negative sample (batch_size, num_negatives)
        neg_sim = (user_emb.unsqueeze(1) * neg_item_emb).sum(dim=2)  # (batch_size, num_negatives)
        
        # Calculate contrastive loss (margin-based)
        margin = 1.0
        loss = torch.clamp(margin - pos_sim.unsqueeze(1) + neg_sim, min=0)  # (batch_size, num_negatives)
        
        return loss.mean()  # Mean across all samples and negative items


In [None]:
class CollaborativeContrastiveDataset(Dataset):
    def __init__(self, df, implicit_df, negative_samples=1):
        """
        Args:
            df: DataFrame with explicit ratings (user-item)
            implicit_df: DataFrame with implicit feedback (user-item, binary values)
            negative_samples: Number of negative samples per user-item pair
        """
        self.ratings = df.reset_index(drop=True)
        self.implicit = implicit_df.reset_index(drop=True)
        self.num_papers = df['pid'].nunique()
        self.negative_samples = negative_samples

        # Combine explicit and implicit interactions for negative sampling
        self.scientist_interactions = (
            pd.concat([self.ratings[['sid', 'pid']], self.implicit], ignore_index=True)
            .groupby('sid')['pid']
            .apply(set)
            .to_dict()
        )
        
    def __len__(self):
        return len(self.ratings)  # One entry per explicit rating
    
    def __getitem__(self, idx):
        # Fetch the row from the ratings DataFrame
        row = self.ratings.iloc[idx]  # This is a pandas Series, not a tuple
        
        scientist = int(row['sid'])
        pos_paper = int(row['pid'])
        rating = float(row['rating'])
        
        # Positive pair from implicit feedback: sample one
        implicit_scientist_rows = self.implicit[self.implicit['sid'] == scientist]
        if not implicit_scientist_rows.empty:
            pos_implicit_paper = int(implicit_scientist_rows.sample(1)['pid'].values[0])
        else:
            pos_implicit_paper = pos_paper  # fallback to same as rating
        
        # Sample negatives
        negatives = []
        interacted = self.scientist_interactions.get(scientist, set())
        while len(negatives) < self.negative_samples:
            neg_paper = np.random.randint(self.num_papers)
            if neg_paper not in interacted:
                negatives.append(neg_paper)
        
        return (
            torch.tensor(scientist, dtype=torch.long),
            torch.tensor(pos_paper, dtype=torch.long),
            torch.tensor(pos_implicit_paper, dtype=torch.long),
            torch.tensor(negatives, dtype=torch.long),
            torch.tensor(rating, dtype=torch.float32)
        )

In [None]:
class RatingsOnlyDataset(Dataset):
    def __init__(self, ratings_df):
        self.ratings = ratings_df.reset_index(drop=True)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        row = self.ratings.iloc[idx]
        sid = int(row['sid'])
        pid = int(row['pid'])
        rating = float(row['rating'])

        return (
            torch.tensor(sid, dtype=torch.long),
            torch.tensor(pid, dtype=torch.long),
            torch.tensor(rating, dtype=torch.float32)
        )

In [None]:
train_dataset = CollaborativeContrastiveDataset(train_df, implicit_df)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_dataset = RatingsOnlyDataset(valid_df)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False)

# Initialize model
model = MFWithImplicitContrastiveLoss(10000, 10000, 32).to(device)

# Define loss and optimizer
optimizer = optim.Adam(model.parameters(), lr=6e-4, weight_decay=3e-5)
criterion = nn.MSELoss()
    
NUM_EPOCHS = 20
best_rmse = float("inf")
patience = 2
epochs_no_improve = 0
best_model_state = None    
    
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for batch in train_loader:
        sid, pos_pid, pos_implicit_paper, neg_papers, rating = batch
        sid = sid.to(device)
        pos_pid = pos_pid.to(device) 
        pos_implicit_paper = pos_implicit_paper.to(device)
        neg_papers = neg_papers.to(device)
        rating = rating.to(device)
        
        predicted_rating = model(sid, pos_pid)
        
        mse_loss = F.mse_loss(predicted_rating, rating)
        loss_contrastive = model.contrastive_loss(sid, pos_implicit_paper, neg_papers)

        loss = 0.7 * mse_loss + 0.3 * loss_contrastive
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate on validation set
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    with torch.no_grad():
        for sid, pid, rating in valid_loader:
            sid = sid.to(device)
            pid = pid.to(device)
            rating = rating.to(device)

            # Forward pass: predict rating
            predicted_rating = model(sid, pid)

            # MSE loss for regression
            mse_loss = F.mse_loss(predicted_rating, rating, reduction='sum')

            total_val_mse += mse_loss.item()
            total_val_data += len(sid)

    val_rmse = (total_val_mse / total_val_data) ** 0.5
    train_loss = total_loss / total_data
    print(f"[Epoch {epoch+1}] Train loss={train_loss:.3f}, Valid RMSE={val_rmse:.3f}")

    # Early stopping check
    if val_rmse < best_rmse:
        best_rmse = val_rmse
        best_model_state = copy.deepcopy(model.state_dict())
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print(f"Stopped early at epoch {epoch+1}. Best RMSE: {best_rmse:.4f}")
        break

# load best model back
model.load_state_dict(best_model_state)    

In [None]:
# from recommenders.models.ncf.ncf_singlenode import NCF
# from recommenders.models.ncf.dataset import Dataset as NCFDataset


# train_file = DATA_DIR + "/train.csv"
# test_file = DATA_DIR + "/test.csv"
# train_df.to_csv(train_file, index=False)
# valid_df.to_csv(test_file, index=False)


# train_dataset = NCFDataset(train_file, col_user='sid', col_item='pid', col_rating='rating')
# valid_dataset = NCFDataset(test_file, col_user='sid', col_item='pid', col_rating='rating')


# ncf_model = NCF(
#     n_users=train_df['sid'].nunique(),
#     n_items=train_df['pid'].nunique(),
#     model_type="NeuMF",  # Other options: GMF, MLP
#     n_factors=32,
#     layer_sizes=[16, 8],
#     learning_rate=0.001,
#     batch_size=64,
#     n_epochs=10,
#     verbose=1
# )

# # Fit the model
# ncf_model.fit(train_df)

# user_item_pairs = valid_df[['userID', 'itemID']]
# predictions = ncf_model.predict(user_item_pairs)


# rmse_score = rmse(valid_df['rating'], predictions)
# print(f"RMSE: {rmse_score}")