In [1]:
!which python

/home/atulpandey/Projects/venv/bin/python


In [49]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
import os
import random
from importlib import import_module
from sklearn.model_selection import train_test_split
import importlib.util
import torch
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


In [50]:
DATA_PATH = "../clean_data"
MODEL_PATH = "../model/stamp.pt"
MODEL_SCRIPT = "../model/stamp_model.py"
TEST_SPLIT = 0.8
BATCH_SIZE = 64
SEED = 42
torch.manual_seed(SEED)
random.seed(SEED)

TOP_K = [5, 20]


In [51]:
ratings = pd.read_csv(os.path.join(DATA_PATH, "ratings.csv"))
books = pd.read_csv(os.path.join(DATA_PATH, "books.csv"))
users = pd.read_csv(os.path.join(DATA_PATH, "users.csv"))

In [52]:
print("Data loaded successfully!")
print(f"Ratings shape: {ratings.shape}")
print(f"Books shape: {books.shape}")
print(f"Users shape: {users.shape}")

Data loaded successfully!
Ratings shape: (269616, 3)
Books shape: (271353, 8)
Users shape: (168096, 3)


In [53]:

ratings = ratings.sample(
    frac=1, random_state=SEED).reset_index(drop=True)
split_idx = int(len(ratings) * (1 - TEST_SPLIT))
test_df = ratings.iloc[split_idx:].reset_index(drop=True)

print(f"Using {len(test_df)} samples for evaluation")

Using 215693 samples for evaluation


In [54]:
train_df, test_df = train_test_split(ratings, test_size=TEST_SPLIT, random_state=SEED)
print(f"Train size: {len(train_df)}, Test size: {len(test_df)}")


Train size: 53923, Test size: 215693


In [55]:
import os
print("CWD:", os.getcwd())
print("MODEL_PATH:", os.path.abspath(MODEL_PATH))
print("Exists:", os.path.exists(MODEL_PATH))


CWD: /home/atulpandey/Projects/Book_Recommendation_System/notebook
MODEL_PATH: /home/atulpandey/Projects/Book_Recommendation_System/model/stamp.pt
Exists: True


In [56]:
MODEL_PATH = os.path.abspath("../model/stamp_model.py")  # make sure this path is correct
print("Loading model from:", MODEL_PATH)

if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")

spec = importlib.util.spec_from_file_location("stamp_model", MODEL_PATH)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)

STAMP = getattr(module, "STAMP")


Loading model from: /home/atulpandey/Projects/Book_Recommendation_System/model/stamp_model.py


In [57]:
MODEL_WEIGHTS_PATH = os.path.abspath("../model/stamp.pt")  
num_items = len(all_items)
num_users = ratings['user_id'].nunique()

model = STAMP(num_items=num_items)  

if os.path.exists(MODEL_WEIGHTS_PATH):
    checkpoint = torch.load(MODEL_WEIGHTS_PATH, map_location=device)

    state_dict = checkpoint['model_state'] if 'model_state' in checkpoint else checkpoint

    model_dict = model.state_dict()
    pretrained_dict = {}

    for k, v in state_dict.items():
        if k in model_dict:
            if model_dict[k].shape == v.shape:
                pretrained_dict[k] = v
            else:
                print(f"Shape mismatch for '{k}': checkpoint {v.shape} vs model {model_dict[k].shape} — skipped")
        else:
            print(f"Unexpected key in checkpoint: {k}")

    # Update model dict and load it
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)

    print(f"Loaded partial weights from {MODEL_WEIGHTS_PATH} (skipped {len(state_dict) - len(pretrained_dict)} mismatched layers)")

else:
    print("Model weights file not found, using untrained model.")

model.to(device)
model.eval()
print("Model ready for evaluation ✅")


Shape mismatch for 'item_embedding.weight': checkpoint torch.Size([113992, 100]) vs model torch.Size([119943, 100]) — skipped
Loaded partial weights from /home/atulpandey/Projects/Book_Recommendation_System/model/stamp.pt (skipped 1 mismatched layers)
Model ready for evaluation ✅


In [58]:
all_items = ratings['book_isbn'].unique()
isbn_to_idx = {isbn: idx for idx, isbn in enumerate(all_items)}
all_items_encoded = [isbn_to_idx[i] for i in all_items]
all_items_tensor = torch.tensor(all_items_encoded, dtype=torch.long).to(device)

In [59]:
def precision_at_k(model, test_df, user_pos_items, isbn_to_idx, k_values=[5, 20]):
    model.eval()
    results = {k: [] for k in k_values}
    all_items = [isbn_to_idx[i] for i in test_df['book_isbn'].unique() if i in isbn_to_idx]
    all_items_tensor = torch.tensor(all_items, dtype=torch.long).unsqueeze(0).to(device)

    for user_id, group in test_df.groupby("user_id"):
        session_items = [isbn_to_idx[i] for i in user_pos_items.get(user_id, []) if i in isbn_to_idx]
        if not session_items:
            continue
        session_tensor = torch.tensor([session_items], dtype=torch.long).to(device)
        with torch.no_grad():
            preds = model(session_tensor, all_items_tensor).squeeze().cpu().numpy()
        top_indices = np.argsort(preds)[::-1]
        top_items = [all_items[i] for i in top_indices]
        actual_items = set(isbn_to_idx[i] for i in group['book_isbn'].tolist() if i in isbn_to_idx)
        for k in k_values:
            recommended = top_items[:k]
            hits = len(set(recommended) & actual_items)
            results[k].append(hits / k)
    results = {k: np.mean(v) if v else 0.0 for k, v in results.items()}
    return results


In [None]:
results = precision_at_k(model, test_df, user_pos_items, isbn_to_idx, k_values=TOP_K)
for k, val in results.items():
    print(f"Precision@{k}: {val:.4f}")
