In [1]:
from typing import Tuple, Callable

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
import os

In [2]:
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

In [3]:
DATA_DIR = ""


def read_data_df() -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Reads in data and splits it into training and validation sets with a 75/25 split."""
    
    df = pd.read_csv(os.path.join(DATA_DIR, "train_ratings.csv"))

    # Split sid_pid into sid and pid columns
    df[["sid", "pid"]] = df["sid_pid"].str.split("_", expand=True)
    df = df.drop("sid_pid", axis=1)
    df["sid"] = df["sid"].astype(int)
    df["pid"] = df["pid"].astype(int)
    
    # Split into train and validation dataset
    train_df, valid_df = train_test_split(df, test_size=0.01)
    return train_df, valid_df


def read_data_matrix(df: pd.DataFrame) -> np.ndarray:
    """Returns matrix view of the training data, where columns are scientists (sid) and
    rows are papers (pid)."""

    return df.pivot(index="sid", columns="pid", values="rating").values


def evaluate(valid_df: pd.DataFrame, pred_fn: Callable[[np.ndarray, np.ndarray], np.ndarray]) -> float:
    """
    Inputs:
        valid_df: Validation data, returned from read_data_df for example.
        pred_fn: Function that takes in arrays of sid and pid and outputs their rating predictions.

    Outputs: Validation RMSE
    """
    
    preds = pred_fn(valid_df["sid"].values, valid_df["pid"].values)
    return root_mean_squared_error(valid_df["rating"].values, preds)


def make_submission(pred_fn: Callable[[np.ndarray, np.ndarray], np.ndarray], filename: os.PathLike):
    """Makes a submission CSV file that can be submitted to kaggle.

    Inputs:
        pred_fn: Function that takes in arrays of sid and pid and outputs a score.
        filename: File to save the submission to.
    """
    
    df = pd.read_csv(os.path.join(DATA_DIR, "sample_submission.csv"))

    # Get sids and pids
    sid_pid = df["sid_pid"].str.split("_", expand=True)
    sids = sid_pid[0]
    pids = sid_pid[1]
    sids = sids.astype(int).values
    pids = pids.astype(int).values
    
    df["rating"] = pred_fn(sids, pids)
    df.to_csv(filename, index=False)

In [4]:
train_df, valid_df = read_data_df()

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using: {device}")

Using: cpu


In [6]:
def get_dataset(df: pd.DataFrame) -> torch.utils.data.Dataset:
    """Conversion from pandas data frame to torch dataset."""
    
    sids = torch.from_numpy(df["sid"].to_numpy())
    pids = torch.from_numpy(df["pid"].to_numpy())
    ratings = torch.from_numpy(df["rating"].to_numpy()).float()
    return torch.utils.data.TensorDataset(sids, pids, ratings)

In [7]:
def read_data_matrix(df: pd.DataFrame) -> np.ndarray:
    """Returns matrix view of the training data, where columns are scientists (sid) and
    rows are papers (pid)."""

    return df.pivot(index="sid", columns="pid", values="rating").values

def impute_values(mat: np.ndarray) -> np.ndarray:
    return np.nan_to_num(mat, nan=0.0)

In [8]:
Y = read_data_matrix(train_df)
Y = impute_values(Y)

In [9]:
def read_data_tbr() -> pd.DataFrame:
    """Reads in wishlist data"""
    
    df = pd.read_csv(os.path.join(DATA_DIR, "train_tbr.csv"))

    
    return df

In [10]:
wishlist_df = read_data_tbr()
wishlist_df["rating"] = 1

missing_sids = []
for i in range(10000):
    if wishlist_df[wishlist_df["sid"] == i].shape[0] == 0:
        missing_sids.append(i)
        
for i in range(len(missing_sids)):
    wishlist_df = pd.concat([wishlist_df, pd.DataFrame({"sid": [missing_sids[i]], "pid": [0], "rating": [0]})], ignore_index=True)

wishlist = read_data_matrix(wishlist_df)
wishlist = impute_values(wishlist)

In [11]:
class DeepMatrixFactorizationModel(nn.Module):
    def __init__(self, num_scientists: int, num_papers: int, dim: int, hidden_dim: int, Y: np.ndarray, rating: np.ndarray):
        super().__init__()

        self.register_buffer("Y", torch.from_numpy(Y).float())
        self.register_buffer("rating", torch.from_numpy(rating).float())

        
        self.scientist_nn = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_papers, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, dim),
            nn.ReLU()
        )

        self.paper_nn = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_scientists, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, dim),
            nn.ReLU()
        )

        self.srating_nn = nn.Sequential(
            nn.Linear(num_papers, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, dim),
            nn.ReLU()
        )

        self.prating_nn = nn.Sequential(
            nn.Linear(num_scientists, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, dim),
            nn.ReLU()
        )

        self.final_nn = nn.Sequential(
            nn.Linear(4*dim, dim),
            nn.ReLU(),
            nn.Linear(dim, 1)
        )


    def forward(self, sid: torch.Tensor, pid: torch.Tensor) -> torch.Tensor:
        """
        Inputs:
            sid: [B,], int
            pid: [B,], int
        
        Outputs: [B,], float
        """
        scientist_row = self.Y[sid, :]
        paper_row = self.Y[:, pid].T

        srating_row = self.rating[sid, :]
        prating_row = self.rating[:, pid].T

        

        p = self.scientist_nn(scientist_row)
        q = self.paper_nn(paper_row)

        srating = self.srating_nn(srating_row)
        prating = self.prating_nn(prating_row)
        
        #r = p * srating
        #sr = q * prating


        # Per-pair dot product
        return self.final_nn(torch.cat([p,q,srating,prating], dim=1)).squeeze(1)

In [12]:
# Define model (10k scientists, 1k papers, 32-dimensional embeddings) and optimizer
model = DeepMatrixFactorizationModel(10_000, 1_000, 40, 40, Y, wishlist).to(device)
optim = torch.optim.Adam(model.parameters(), lr=5*1e-5)

In [13]:
train_dataset = get_dataset(train_df)
valid_dataset = get_dataset(valid_df)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=32, shuffle=False)

In [14]:
NUM_EPOCHS = 30
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

[Epoch 1/30] Train loss=0.928, Valid RMSE=0.919
[Epoch 2/30] Train loss=0.822, Valid RMSE=0.890
[Epoch 3/30] Train loss=0.801, Valid RMSE=0.897
[Epoch 4/30] Train loss=0.790, Valid RMSE=0.894
[Epoch 5/30] Train loss=0.782, Valid RMSE=0.884
[Epoch 6/30] Train loss=0.776, Valid RMSE=0.878
[Epoch 7/30] Train loss=0.771, Valid RMSE=0.898
[Epoch 8/30] Train loss=0.768, Valid RMSE=0.876
[Epoch 9/30] Train loss=0.764, Valid RMSE=0.874
[Epoch 10/30] Train loss=0.761, Valid RMSE=0.873
[Epoch 11/30] Train loss=0.758, Valid RMSE=0.870
[Epoch 12/30] Train loss=0.755, Valid RMSE=0.877
[Epoch 13/30] Train loss=0.752, Valid RMSE=0.869
[Epoch 14/30] Train loss=0.750, Valid RMSE=0.869
[Epoch 15/30] Train loss=0.748, Valid RMSE=0.869
[Epoch 16/30] Train loss=0.745, Valid RMSE=0.867
[Epoch 17/30] Train loss=0.744, Valid RMSE=0.866
[Epoch 18/30] Train loss=0.742, Valid RMSE=0.868
[Epoch 19/30] Train loss=0.740, Valid RMSE=0.863
[Epoch 20/30] Train loss=0.739, Valid RMSE=0.868
[Epoch 21/30] Train loss=0.73

In [15]:
NUM_EPOCHS = 30
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

[Epoch 1/30] Train loss=0.723, Valid RMSE=0.860
[Epoch 2/30] Train loss=0.722, Valid RMSE=0.860
[Epoch 3/30] Train loss=0.721, Valid RMSE=0.859
[Epoch 4/30] Train loss=0.720, Valid RMSE=0.861
[Epoch 5/30] Train loss=0.719, Valid RMSE=0.861
[Epoch 6/30] Train loss=0.718, Valid RMSE=0.860
[Epoch 7/30] Train loss=0.717, Valid RMSE=0.859
[Epoch 8/30] Train loss=0.716, Valid RMSE=0.862
[Epoch 9/30] Train loss=0.715, Valid RMSE=0.860
[Epoch 10/30] Train loss=0.714, Valid RMSE=0.857
[Epoch 11/30] Train loss=0.714, Valid RMSE=0.857
[Epoch 12/30] Train loss=0.713, Valid RMSE=0.859
[Epoch 13/30] Train loss=0.711, Valid RMSE=0.858
[Epoch 14/30] Train loss=0.711, Valid RMSE=0.857
[Epoch 15/30] Train loss=0.710, Valid RMSE=0.855
[Epoch 16/30] Train loss=0.709, Valid RMSE=0.855
[Epoch 17/30] Train loss=0.708, Valid RMSE=0.857
[Epoch 18/30] Train loss=0.708, Valid RMSE=0.857
[Epoch 19/30] Train loss=0.707, Valid RMSE=0.855
[Epoch 20/30] Train loss=0.706, Valid RMSE=0.856
[Epoch 21/30] Train loss=0.70

In [18]:
NUM_EPOCHS = 5
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

[Epoch 1/5] Train loss=0.700, Valid RMSE=0.855
[Epoch 2/5] Train loss=0.699, Valid RMSE=0.857
[Epoch 3/5] Train loss=0.698, Valid RMSE=0.859
[Epoch 4/5] Train loss=0.698, Valid RMSE=0.857
[Epoch 5/5] Train loss=0.697, Valid RMSE=0.857


In [19]:
NUM_EPOCHS = 1
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

[Epoch 1/1] Train loss=0.697, Valid RMSE=0.860


In [20]:
NUM_EPOCHS = 1
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

[Epoch 1/1] Train loss=0.696, Valid RMSE=0.854


In [23]:
NUM_EPOCHS = 1
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

[Epoch 1/1] Train loss=0.696, Valid RMSE=0.856


In [24]:
NUM_EPOCHS = 10
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

[Epoch 1/10] Train loss=0.695, Valid RMSE=0.855
[Epoch 2/10] Train loss=0.695, Valid RMSE=0.862
[Epoch 3/10] Train loss=0.694, Valid RMSE=0.856
[Epoch 4/10] Train loss=0.694, Valid RMSE=0.856
[Epoch 5/10] Train loss=0.693, Valid RMSE=0.856
[Epoch 6/10] Train loss=0.693, Valid RMSE=0.854
[Epoch 7/10] Train loss=0.692, Valid RMSE=0.854
[Epoch 8/10] Train loss=0.692, Valid RMSE=0.859
[Epoch 9/10] Train loss=0.692, Valid RMSE=0.858
[Epoch 10/10] Train loss=0.691, Valid RMSE=0.855


In [27]:
NUM_EPOCHS = 5
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

[Epoch 1/5] Train loss=0.691, Valid RMSE=0.855
[Epoch 2/5] Train loss=0.690, Valid RMSE=0.853
[Epoch 3/5] Train loss=0.690, Valid RMSE=0.855
[Epoch 4/5] Train loss=0.689, Valid RMSE=0.854
[Epoch 5/5] Train loss=0.689, Valid RMSE=0.855


In [30]:
NUM_EPOCHS = 17
for epoch in range(NUM_EPOCHS):
    # Train model for an epoch
    total_loss = 0.0
    total_data = 0
    model.train()
    for sid, pid, ratings in train_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Make prediction and compute loss
        pred = model(sid, pid)
        loss = F.mse_loss(pred, ratings)

        # Compute gradients w.r.t. loss and take a step in that direction
        optim.zero_grad()
        loss.backward()
        optim.step()

        # Keep track of running loss
        total_data += len(sid)
        total_loss += len(sid) * loss.item()

    # Evaluate model on validation data
    total_val_mse = 0.0
    total_val_data = 0
    model.eval()
    for sid, pid, ratings in valid_loader:
        # Move data to GPU
        sid = sid.to(device)
        pid = pid.to(device)
        ratings = ratings.to(device)

        # Clamp predictions in [1,5], since all ground-truth ratings are
        pred = model(sid, pid).clamp(1, 5)
        mse = F.mse_loss(pred, ratings)

        # Keep track of running metrics
        total_val_data += len(sid)
        total_val_mse += len(sid) * mse.item()

    print(f"[Epoch {epoch+1}/{NUM_EPOCHS}] Train loss={total_loss / total_data:.3f}, Valid RMSE={(total_val_mse / total_val_data) ** 0.5:.3f}")

KeyboardInterrupt: 

In [28]:
def batched_pred_fn(sids, pids, batch_size=1024):
    results = []
    num_samples = len(sids)
    
    for i in range(0, num_samples, batch_size):
        batch_sids = sids[i:i+batch_size]
        batch_pids = pids[i:i+batch_size]
        
        batch_sids_tensor = torch.from_numpy(batch_sids).to(device)
        batch_pids_tensor = torch.from_numpy(batch_pids).to(device)
        
        batch_preds = model(batch_sids_tensor, batch_pids_tensor).clamp(1, 5).cpu().numpy()
        results.append(batch_preds)
        
        del batch_sids_tensor, batch_pids_tensor
        torch.cuda.empty_cache()
        
    return np.concatenate(results)

# Evaluate on validation data
with torch.no_grad():
    # First clear any unused memory
    torch.cuda.empty_cache()
    val_score = evaluate(valid_df, batched_pred_fn)

print(f"Validation RMSE: {val_score:.3f}")

Validation RMSE: 0.855


In [29]:
with torch.no_grad():
    make_submission(batched_pred_fn, "deep_matrix_submission_wish_dropout_83epochs.csv")