In [1]:
!pip install torch torchvision torchaudio -q
!pip install torch-geometric -q
!pip install dgl -q  # generic DGL (CPU/GPU autodetect)
!pip install torchmetrics==1.4.0.post0 scikit-learn pandas numpy tqdm geopy haversine -q

In [2]:
# ============================================================
# Cell 1: Imports
# ============================================================
import pandas as pd
import numpy as np
import ast
from collections import defaultdict

import torch
from torch import nn
import torch.nn.functional as F

from tqdm import tqdm
from scipy.sparse import coo_matrix
from scipy.sparse.linalg import svds
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import pairwise_distances

Data Loading

In [3]:
import json

### Metadata

In [4]:
metadata_path = "/content/drive/MyDrive/cs224w_finalproject-main/meta-District_of_Columbia.json"
review_path = "/content/drive/MyDrive/cs224w_finalproject-main/review-District_of_Columbia_10.json"

In [5]:
keywords = {
    "restaurant",
    "cafe",
    "bar",
    "beer",
    "pub",
    "brewery",
    "winery",
    "distillery",
    "brewpub",
    "brewery",
    "brewpub",
    "deli",
    "sandwich",
    "coffee",
    "tea",
    "juice",
    "smoothie"
}

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# Load JSONL file (one JSON object per line)
metadata = []
with open(metadata_path, "r") as f:
    for line in f:
        line = line.strip()
        if line:  # Skip empty lines
            metadata.append(json.loads(line))

print(f"Loaded {len(metadata)} records")
print(f"Type: {type(metadata)}")
if metadata:
    print(f"First record keys: {list(metadata[0].keys())}")

Loaded 11060 records
Type: <class 'list'>
First record keys: ['name', 'address', 'gmap_id', 'description', 'latitude', 'longitude', 'category', 'avg_rating', 'num_of_reviews', 'price', 'hours', 'MISC', 'state', 'relative_results', 'url']


In [8]:
metadata[1]

{'name': "Joseph's Barbershop",
 'address': "Joseph's Barbershop, 2624B Georgia Ave NW, Washington, DC 20001",
 'gmap_id': '0x89b7b797548dfcfd:0xe3a4b60261c60313',
 'description': None,
 'latitude': 38.9249134,
 'longitude': -77.0228857,
 'category': ['Barber shop'],
 'avg_rating': 4.3,
 'num_of_reviews': 8,
 'price': None,
 'hours': [['Thursday', '10AM–7PM'],
  ['Friday', '10AM–7PM'],
  ['Saturday', '10AM–7PM'],
  ['Sunday', 'Closed'],
  ['Monday', '10AM–7PM'],
  ['Tuesday', '10AM–7PM'],
  ['Wednesday', '10AM–7PM']],
 'MISC': None,
 'state': 'Permanently closed',
 'relative_results': ['0x89b7c81aec442c2f:0x2df3cce722072454',
  '0x89b7c86180bf7b99:0xf9c20a51d787e071'],
 'url': 'https://www.google.com/maps/place//data=!4m2!3m1!1s0x89b7b797548dfcfd:0xe3a4b60261c60313?authuser=-1&hl=en&gl=us'}

In [9]:
# Filter metadata to only include entries with at least one keyword in categories
filtered_metadata = []

for entry in metadata:
    categories = entry.get('category', [])
    if categories is None:
        continue

    # Check if any keyword appears in any category string (case-insensitive)
    matches = False
    for category in categories:
        if category is None:
            continue
        category_lower = str(category).lower()
        for keyword in keywords:
            if keyword.lower() in category_lower:
                matches = True
                break
        if matches:
            break

    if matches:
        filtered_metadata.append(entry)

print(f"Original metadata: {len(metadata)} records")
print(f"Filtered metadata: {len(filtered_metadata)} records")
print(f"Filtered {len(metadata) - len(filtered_metadata)} records ({100 * (len(metadata) - len(filtered_metadata)) / len(metadata):.1f}%)")

Original metadata: 11060 records
Filtered metadata: 3719 records
Filtered 7341 records (66.4%)


In [10]:
# Pre-process filtered_metadata: create set of gmap_ids and mapping to metadata entries
filtered_gmap_ids = set()
gmap_id_to_metadata = {}

for entry in filtered_metadata:
    gmap_id = entry.get('gmap_id')
    if gmap_id:
        filtered_gmap_ids.add(gmap_id)
        gmap_id_to_metadata[gmap_id] = entry

print(f"Created set of {len(filtered_gmap_ids)} unique gmap_ids from filtered_metadata")

Created set of 3707 unique gmap_ids from filtered_metadata


In [11]:
# Load reviews from JSONL file
reviews = []
with open(review_path, "r") as f:
    for line in tqdm(f, desc="Loading reviews"):
        line = line.strip()
        if line:  # Skip empty lines
            reviews.append(json.loads(line))

print(f"Loaded {len(reviews)} reviews")
if reviews:
    print(f"First review keys: {list(reviews[0].keys())}")

Loading reviews: 564783it [00:03, 171743.68it/s]

Loaded 564783 reviews
First review keys: ['user_id', 'name', 'time', 'rating', 'text', 'pics', 'resp', 'gmap_id']





In [12]:
# Filter reviews and merge metadata attributes
filtered_reviews = []

for review in tqdm(reviews, desc="Filtering and merging reviews"):
    gmap_id = review.get('gmap_id')

    # Only keep reviews with gmap_id in filtered_metadata
    if gmap_id and gmap_id in filtered_gmap_ids:
        # Create a copy of the review to avoid modifying the original
        merged_review = review.copy()

        # Get the corresponding metadata entry
        metadata_entry = gmap_id_to_metadata[gmap_id]

        # Append all attributes from metadata to the review
        # Use update to merge, which will overwrite if keys conflict (review takes precedence)
        for key, value in metadata_entry.items():
            if key not in merged_review:  # Only add if not already in review
                merged_review[key] = value
            else:
                # If key exists, you might want to prefix it or handle differently
                # For now, we'll keep the review's original value
                pass

        filtered_reviews.append(merged_review)

print(f"Original reviews: {len(reviews)}")
print(f"Filtered reviews: {len(filtered_reviews)}")
print(f"Filtered {len(reviews) - len(filtered_reviews)} reviews ({100 * (len(reviews) - len(filtered_reviews)) / len(reviews):.1f}%)")
if filtered_reviews:
    print(f"Sample filtered review keys: {list(filtered_reviews[0].keys())}")

Filtering and merging reviews: 100%|██████████| 564783/564783 [00:02<00:00, 274902.08it/s]

Original reviews: 564783
Filtered reviews: 347984
Filtered 216799 reviews (38.4%)
Sample filtered review keys: ['user_id', 'name', 'time', 'rating', 'text', 'pics', 'resp', 'gmap_id', 'address', 'description', 'latitude', 'longitude', 'category', 'avg_rating', 'num_of_reviews', 'price', 'hours', 'MISC', 'state', 'relative_results', 'url']





In [13]:
filtered_reviews[1]

{'user_id': '108642560086289718425',
 'name': 'Keshava Mysore',
 'time': 1510920735052,
 'rating': 4,
 'text': 'High prices for not so big portions! But I think it is the only decent food available on the National Mall area without walking a few miles away from there.',
 'pics': None,
 'resp': None,
 'gmap_id': '0x89b7b7851b06ef6b:0x5f356b1eb1da27',
 'address': 'Cascade Café, 599 Constitution Ave. NW, Washington, DC 20565',
 'description': 'Cafeteria-style dining at the National Gallery of Art, with casual fare such as pizza & sandwiches.',
 'latitude': 38.8920767,
 'longitude': -77.0199082,
 'category': ['American restaurant'],
 'avg_rating': 2.6,
 'num_of_reviews': 28,
 'price': None,
 'hours': [['Thursday', '11AM–3PM'],
  ['Friday', '11AM–3PM'],
  ['Saturday', '11AM–3PM'],
  ['Sunday', '11AM–4PM'],
  ['Monday', '11AM–3PM'],
  ['Tuesday', '11AM–3PM'],
  ['Wednesday', '11AM–3PM']],
 'MISC': {'Service options': ['Takeout', 'Dine-in', 'Delivery'],
  'Highlights': ['Great coffee'],
  'Po

### Train-Val-Test Split



In [14]:
# Sort filtered_reviews by time to avoid data leakage
filtered_reviews_sorted = sorted(filtered_reviews, key=lambda x: x.get('time', 0))

# Add review_id field in ascending order
for idx, review in enumerate(filtered_reviews_sorted):
    review['review_id'] = idx

# Extract text field into a dictionary from review_id to review text
review_id_to_text = {}
for review in filtered_reviews_sorted:
    review_id = review['review_id']
    text = review.get('text')
    review_id_to_text[review_id] = text  # Will be None if text is None

print(f"Sorted {len(filtered_reviews_sorted)} reviews by time")
if filtered_reviews_sorted:
    print(f"Earliest time: {filtered_reviews_sorted[0].get('time')}")
    print(f"Latest time: {filtered_reviews_sorted[-1].get('time')}")

# Split into 80:10:10 train:val:test
total = len(filtered_reviews_sorted)
train_size = int(0.8 * total)
val_size = int(0.1 * total)
test_size = total - train_size - val_size  # Remaining goes to test

train_reviews = filtered_reviews_sorted[:train_size]
val_reviews = filtered_reviews_sorted[train_size:train_size + val_size]
test_reviews = filtered_reviews_sorted[train_size + val_size:]

print(f"\nSplit results:")
print(f"Train: {len(train_reviews)} reviews ({100 * len(train_reviews) / total:.1f}%)")
print(f"Val: {len(val_reviews)} reviews ({100 * len(val_reviews) / total:.1f}%)")
print(f"Test: {len(test_reviews)} reviews ({100 * len(test_reviews) / total:.1f}%)")

Sorted 347984 reviews by time
Earliest time: 662601600000
Latest time: 1630966701405

Split results:
Train: 278387 reviews (80.0%)
Val: 34798 reviews (10.0%)
Test: 34799 reviews (10.0%)


In [15]:
train_reviews[-1]

{'user_id': '114719101114449289199',
 'name': 'Alejandro Suarez',
 'time': 1571933437638,
 'rating': 5,
 'text': None,
 'pics': None,
 'resp': None,
 'gmap_id': '0x89b7b82e9ef154c3:0xa310bf3ea7d34a66',
 'address': 'Capitol Lounge, 229 Pennsylvania Ave. SE, Washington, DC 20003',
 'description': 'Neighborhood fixture drawing lots of Hill staffers with a lengthy beer list & happy-hour specials.',
 'latitude': 38.8869652,
 'longitude': -77.0024373,
 'category': ['Bar',
  'American restaurant',
  'Beer hall',
  'Event venue',
  'Pool hall',
  'Sports bar'],
 'avg_rating': 4.2,
 'num_of_reviews': 458,
 'price': '$',
 'hours': [['Sunday', '10AM–2AM'],
  ['Monday', '4PM–2AM'],
  ['Tuesday', '4PM–2AM'],
  ['Wednesday', '4PM–2AM'],
  ['Thursday', '11AM–2AM'],
  ['Friday', '11AM–2AM'],
  ['Saturday', '10AM–3AM']],
 'MISC': {'Service options': ['Delivery', 'Takeout', 'Dine-in'],
  'Accessibility': ['Wheelchair accessible entrance',
   'Wheelchair accessible seating'],
  'Offerings': ['Alcohol',
 

In [22]:
import json
import pandas as pd

train_df = pd.json_normalize(train_reviews)
train_df.to_csv("train_reviews.csv", index=False, escapechar="\\")

In [23]:
val_df = pd.json_normalize(val_reviews)

val = val_df.copy()
val = val.applymap(
    lambda x: json.dumps(x) if isinstance(x, (list, dict)) else x
)

val.to_csv("val_reviews.csv", index=False, escapechar="\\")

  val = val.applymap(


In [24]:
test_df = pd.json_normalize(test_reviews)

test = test_df.copy()
test = test.applymap(
    lambda x: json.dumps(x) if isinstance(x, (list, dict)) else x
)

test.to_csv("test_reviews.csv", index=False, escapechar="\\")

  test = test.applymap(


## Text Embeddings

In [16]:
!pip install sentence-transformers



In [17]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"  # 384-dim

In [18]:
# Setup Sentence-BERT embeddings for each review
from sentence_transformers import SentenceTransformer

# Determine device for SentenceTransformer (CUDA > MPS > CPU)
if torch.cuda.is_available():
    embedding_device = 'cuda'
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
    embedding_device = 'mps'
else:
    embedding_device = 'cpu'

print(f"Using device: {embedding_device} for embeddings")

# Load the model on the specified device
model = SentenceTransformer(model_name, device=embedding_device)

# Collect review_ids and texts (only for reviews with non-None text)
review_ids_with_text = []
review_texts = []

for review_id, text in review_id_to_text.items():
    if text is not None:
        review_ids_with_text.append(review_id)
        review_texts.append(text)

print(f"Generating embeddings for {len(review_texts)} reviews with text (out of {len(filtered_reviews_sorted)} total)")

# Generate embeddings
embeddings = model.encode(
    review_texts,
    batch_size=64,
    show_progress_bar=True,
    convert_to_numpy=True,
    normalize_embeddings=True,  # L2-normalize (often helpful)
)

# Create dictionary mapping review_id to embeddings (only for reviews with text)
review_id_to_embedding = {}
for idx, review_id in enumerate(review_ids_with_text):
    review_id_to_embedding[review_id] = embeddings[idx]

print(f"Created embeddings dictionary with {len(review_id_to_embedding)} entries")
print(f"Embedding shape: {embeddings.shape}")  # (num_reviews_with_text, 384) for MiniLM

Using device: cuda for embeddings


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Generating embeddings for 184355 reviews with text (out of 347984 total)


Batches:   0%|          | 0/2881 [00:00<?, ?it/s]

Created embeddings dictionary with 184355 entries
Embedding shape: (184355, 384)


In [19]:
review_id_to_embedding[0]

array([ 5.02150767e-02,  3.72288488e-02,  7.00677186e-02,  1.52061200e-02,
       -6.65047690e-02,  4.19559255e-02, -3.64256203e-02, -5.37063852e-02,
        4.36112732e-02, -8.46167728e-02, -3.68720256e-02,  1.75607055e-02,
       -5.57896867e-02,  7.06654862e-02,  3.23818587e-02, -1.47804935e-02,
        6.11910895e-02, -5.82540408e-03,  4.92100380e-02, -4.15658355e-02,
       -4.15173247e-02, -1.23551395e-02,  5.40842786e-02,  3.94111648e-02,
       -4.63462695e-02, -2.22464669e-02,  3.22106183e-02,  1.76971615e-03,
        3.03513035e-02, -4.75469530e-02,  2.23724805e-02,  8.75919126e-03,
       -9.65329073e-03, -4.36648168e-02,  1.39976479e-03,  5.05931862e-02,
       -8.76030698e-03, -1.98114589e-02,  7.02279583e-02,  2.07680208e-03,
        2.04424486e-02,  3.49093303e-02,  1.23715969e-02,  4.21532318e-02,
       -4.87891026e-02, -4.00152653e-02,  3.52140404e-02, -2.93613714e-03,
        2.81842649e-02,  4.46706749e-02,  2.07509147e-03,  1.31809032e-02,
       -7.34896341e-04, -

In [20]:
# Add embeddings as a feature to reviews
# This ensures embeddings are included when dataframes are created

def add_embeddings_to_reviews(review_list, embedding_dict):
    """Add embedding feature to each review in the list."""
    for review in review_list:
        review_id = review.get('review_id')
        if review_id is not None:
            # Add embedding if it exists (reviews without text won't have embeddings)
            if review_id in embedding_dict:
                review['embedding'] = embedding_dict[review_id].tolist()  # Convert numpy array to list for JSON serialization
            else:
                review['embedding'] = None  # None for reviews without text
    return review_list

# Add embeddings to train, val, and test reviews
print("Adding embeddings to train reviews...")
train_reviews = add_embeddings_to_reviews(train_reviews, review_id_to_embedding)

print("Adding embeddings to val reviews...")
val_reviews = add_embeddings_to_reviews(val_reviews, review_id_to_embedding)

print("Adding embeddings to test reviews...")
test_reviews = add_embeddings_to_reviews(test_reviews, review_id_to_embedding)

# Count how many reviews have embeddings
train_with_emb = sum(1 for r in train_reviews if r.get('embedding') is not None)
val_with_emb = sum(1 for r in val_reviews if r.get('embedding') is not None)
test_with_emb = sum(1 for r in test_reviews if r.get('embedding') is not None)

print(f"\nEmbeddings added:")
print(f"  Train: {train_with_emb}/{len(train_reviews)} reviews have embeddings")
print(f"  Val: {val_with_emb}/{len(val_reviews)} reviews have embeddings")
print(f"  Test: {test_with_emb}/{len(test_reviews)} reviews have embeddings")


Adding embeddings to train reviews...
Adding embeddings to val reviews...
Adding embeddings to test reviews...

Embeddings added:
  Train: 148928/278387 reviews have embeddings
  Val: 16162/34798 reviews have embeddings
  Test: 19265/34799 reviews have embeddings


In [21]:
train_df = pd.json_normalize(train_reviews)

In [22]:
val_df = pd.json_normalize(val_reviews)

In [23]:
test_df = pd.json_normalize(test_reviews)

Feature Building

In [46]:
# ============================================================
# Normalize 'price' column into integer price levels for HGT
# ============================================================

def normalize_price(x):
    # Handle NaN
    if pd.isna(x):
        return 0  # unknown price

    # If it's a number (float or int)
    if isinstance(x, (int, float)):
        return int(x)

    # If it's a string: attempt numeric conversion
    if isinstance(x, str):
        x = x.strip()

        # Like "$$", "$$$", etc. → map to length
        if all(c == '$' for c in x):
            return len(x)

        # If string represents a number, convert
        if x.isdigit():
            return int(x)

        # Otherwise fallback
        return 0

    # Anything else
    return 0

# Apply to train, val, test (to keep consistency)
for df in [train_df, val_df, test_df]:
    df["price"] = df["price"].apply(normalize_price)

In [47]:
# ============================================================
# Cell 3: Build global ID mappings (SAFE) and add u/i columns
# ============================================================
all_users = pd.concat(
    [train_df["user_id"], val_df["user_id"], test_df["user_id"]],
    ignore_index=True
)
all_items = pd.concat(
    [train_df["gmap_id"], val_df["gmap_id"], test_df["gmap_id"]],
    ignore_index=True
)

user2idx = {u: idx for idx, u in enumerate(all_users.unique())}
item2idx = {i: idx for idx, i in enumerate(all_items.unique())}

num_users = len(user2idx)
num_items = len(item2idx)
print("num_users:", num_users, "num_items:", num_items)

for df in [train_df, val_df, test_df]:
    df["u"] = df["user_id"].map(user2idx)
    df["i"] = df["gmap_id"].map(item2idx)

# Train-only view for all feature construction (avoid leakage)
full_df_train = train_df.copy()

num_users: 25840 num_items: 2922


In [48]:
# ============================================================
# Cell 4: Hardened, Normalized Train-only Feature Builder
# ============================================================

import numpy as np
import pandas as pd

# ------------------------------------------------------------
# 4.1 Ensure category is always a list
# ------------------------------------------------------------
for df in [train_df, val_df, test_df]:
    df["category"] = df["category"].apply(lambda x: x if isinstance(x, list) else [])

# TRAIN-ONLY METADATA
df_train = full_df_train.copy()


# ============================================================
# 4.2 GEO FEATURES (latitude, longitude)
# ============================================================
df_train["latitude"]  = pd.to_numeric(df_train["latitude"], errors="coerce")
df_train["longitude"] = pd.to_numeric(df_train["longitude"], errors="coerce")

item_geo = (
    df_train.groupby("i")[["latitude", "longitude"]]
    .mean()                           # average per item
    .reindex(range(num_items))        # ensure full coverage
    .fillna(0.0)
)

# Normalize geo features (z-score)
item_geo = (item_geo - item_geo.mean()) / (item_geo.std() + 1e-6)
item_geo = item_geo.fillna(0.0).astype("float32")


# ============================================================
# 4.3 CATEGORY MULTI-HOT
# ============================================================
from sklearn.preprocessing import MultiLabelBinarizer

cat_lists = df_train.groupby("i")["category"].apply(
    lambda x: x.iloc[0] if isinstance(x.iloc[0], list) else []
)

mlb = MultiLabelBinarizer()
cat_matrix = mlb.fit_transform(cat_lists)

item_cat = (
    pd.DataFrame(cat_matrix, columns=[f"cat_{c}" for c in mlb.classes_], index=cat_lists.index)
    .reindex(range(num_items), fill_value=0)
    .astype("float32")
)

# categories are already 0/1; normalization optional but safe:
# (optional) item_cat  = (item_cat - item_cat.mean()) / (item_cat.std() + 1e-6)


# ============================================================
# 4.4 NUMERIC STATS (avg_rating, num_of_reviews)
# ============================================================

stats = (
    df_train.groupby("i")[["avg_rating", "num_of_reviews"]]
    .first()
    .reindex(range(num_items))
)

stats["avg_rating"]     = pd.to_numeric(stats["avg_rating"], errors="coerce").fillna(0)
stats["num_of_reviews"] = pd.to_numeric(stats["num_of_reviews"], errors="coerce").fillna(0)

# Normalize ratings: center around 3, scale by 2 → range ≈ [-1, +1]
stats["avg_rating"] = (stats["avg_rating"] - 3.0) / 2.0

# Normalize review counts: log transform for stability
stats["num_of_reviews"] = np.log1p(stats["num_of_reviews"])

# z-score normalize stats
stats = (stats - stats.mean()) / (stats.std() + 1e-6)
item_stats = stats.fillna(0).astype("float32")


# ============================================================
# 4.5 MISC TAG FEATURES
# ============================================================
misc_cols = [c for c in df_train.columns if c.startswith("MISC")]

# Extract vocabulary
misc_labels = set()
for col in misc_cols:
    for entry in df_train[col]:
        if isinstance(entry, list):
            misc_labels.update(entry)

misc_labels = sorted(misc_labels)
misc_label2idx = {tag: idx for idx, tag in enumerate(misc_labels)}

# Initialize (num_items, num_misc_labels)
item_misc_np = np.zeros((num_items, len(misc_labels)), dtype="float32")

# Fill MISC tags
for idx, row in df_train.iterrows():
    i = row["i"]
    if not (0 <= i < num_items):
        continue
    for col in misc_cols:
        tags = row[col]
        if isinstance(tags, list):
            for tag in tags:
                j = misc_label2idx.get(tag)
                if j is not None:
                    item_misc_np[i, j] = 1.0

item_misc = pd.DataFrame(item_misc_np, columns=[f"misc_{m}" for m in misc_labels])

# z-score normalize MISC features
# (optional: many models skip normalization for binary one-hot)
if item_misc.values.sum() > 0:
    item_misc = (item_misc - item_misc.mean()) / (item_misc.std() + 1e-6)
item_misc = item_misc.fillna(0).astype("float32")


# ============================================================
# 4.6 Combine ALL item features safely
# ============================================================
item_features_np = np.hstack([
    item_geo.values.astype("float32"),
    item_cat.values.astype("float32"),
    item_stats.values.astype("float32"),
    item_misc.values.astype("float32"),
])

# Final global normalization for stability
item_features_np = (item_features_np - item_features_np.mean(axis=0)) / \
                   (item_features_np.std(axis=0) + 1e-6)

item_features_np = np.nan_to_num(item_features_np, nan=0.0, posinf=0.0, neginf=0.0)

item_features = torch.tensor(item_features_np, dtype=torch.float32)
print("Final item_features shape:", item_features.shape)


Final item_features shape: torch.Size([2922, 538])


In [49]:
# ============================================================
# Aggregate review embeddings to item embeddings
# ============================================================

embedding_dim = embeddings.shape[1] if embeddings.size > 0 else 384 # Default if no embeddings

# Initialize a list of lists to store embeddings for each item
item_embeddings_list_of_lists = [[] for _ in range(num_items)]

# Populate the list with review embeddings for each item
for review_data in filtered_reviews_sorted:
    gmap_id = review_data.get('gmap_id')
    embedding = review_data.get('embedding')

    if gmap_id and embedding is not None and gmap_id in item2idx:
        item_idx = item2idx[gmap_id]
        item_embeddings_list_of_lists[item_idx].append(torch.tensor(embedding, dtype=torch.float32))

# Calculate the average embedding for each item
averaged_item_embeddings = []
for embeddings_for_item in item_embeddings_list_of_lists:
    if embeddings_for_item:
        # Stack and average the embeddings for the current item
        avg_embedding = torch.stack(embeddings_for_item).mean(dim=0)
        averaged_item_embeddings.append(avg_embedding)
    else:
        # If no embeddings, append a zero vector
        averaged_item_embeddings.append(torch.zeros(embedding_dim, dtype=torch.float32))

# Convert to a single tensor
item_embedding_features = torch.stack(averaged_item_embeddings)

print("Original item_features shape:", item_features.shape)
print("New item_embedding_features shape:", item_embedding_features.shape)

# Concatenate with existing item_features
item_features = torch.cat([item_features, item_embedding_features], dim=1)

# Renormalize the combined item_features for stability
# Move to CPU for numpy operations for now, then back to device
item_features_np = item_features.cpu().numpy()
item_features_np = (item_features_np - item_features_np.mean(axis=0)) / \
                   (item_features_np.std(axis=0) + 1e-6)
item_features_np = np.nan_to_num(item_features_np, nan=0.0, posinf=0.0, neginf=0.0)
item_features = torch.tensor(item_features_np, dtype=torch.float32)

print("Combined and re-normalized item_features shape:", item_features.shape)


Original item_features shape: torch.Size([2922, 538])
New item_embedding_features shape: torch.Size([2922, 384])
Combined and re-normalized item_features shape: torch.Size([2922, 922])


In [50]:
# ============================================================
# Cell 5: Train-only user features (home coord, count, avg rating) and normalize
# ============================================================

user_latlon = (
    df_train.groupby("u")[["latitude", "longitude"]]
    .mean()
    .reindex(range(num_users))
    .fillna(0.0)
)

# normalize latitude/longitude
user_latlon = (user_latlon - user_latlon.mean()) / (user_latlon.std() + 1e-6)
user_latlon = user_latlon.fillna(0)

user_review_counts = (
    df_train.groupby("u")["rating"]
    .count()
    .reindex(range(num_users))
    .fillna(0)
)

# log-normalize review counts
user_review_counts = np.log1p(user_review_counts)

# z-score normalize
user_review_counts = (user_review_counts - user_review_counts.mean()) / (user_review_counts.std() + 1e-6)

user_avg_rating = (
    df_train.groupby("u")["rating"]
    .mean()
    .reindex(range(num_users))
    .fillna(3.0)  # neutral rating
)

# normalize: center at 3, divide by 2
user_avg_rating = (user_avg_rating - 3.0) / 2.0

# z-score
user_avg_rating = (user_avg_rating - user_avg_rating.mean()) / (user_avg_rating.std() + 1e-6)

# combine
user_features_np = np.vstack([
    user_latlon["latitude"].values,
    user_latlon["longitude"].values,
    user_review_counts.values,
    user_avg_rating.values
]).T.astype("float32")

user_features_np = np.nan_to_num(user_features_np, nan=0.0, posinf=0.0, neginf=0.0)

user_features = torch.tensor(user_features_np, dtype=torch.float32)
print("Final user_features shape:", user_features.shape)

Final user_features shape: torch.Size([25840, 4])


Diagonstics

In [53]:
print("Any NaNs in item_features:", torch.isnan(item_features).any().item())
print("Any Infs in item_features:", torch.isinf(item_features).any().item())
print("Item feature max/min:", item_features.max(), item_features.min())

print("\nAny NaNs in user_features:", torch.isnan(user_features).any().item())
print("Any Infs in user_features:", torch.isinf(user_features).any().item())
print("User feature max/min:", user_features.max(), user_features.min())

Any NaNs in item_features: False
Any Infs in item_features: False
Item feature max/min: tensor(54.0488) tensor(-8.4582)

Any NaNs in user_features: False
Any Infs in user_features: False
User feature max/min: tensor(4.7321) tensor(-4.7368)


In [52]:
print("GEO NaNs:",    np.isnan(item_geo.values).any())
print("CAT NaNs:",    np.isnan(item_cat.values).any())
print("STATS NaNs:",  np.isnan(item_stats.values).any())
print("MISC NaNs:",   np.isnan(item_misc.values).any())

GEO NaNs: False
CAT NaNs: False
STATS NaNs: False
MISC NaNs: False


In [37]:
print("GEO shape:", item_geo.shape)
print("CAT shape:", item_cat.shape)
print("STATS shape:", item_stats.shape)
print("MISC shape:", item_misc.shape)
print("Expected num_items:", num_items)

GEO shape: (2922, 2)
CAT shape: (2922, 415)
STATS shape: (2922, 2)
MISC shape: (2922, 119)
Expected num_items: 2922


In [38]:
bad_item_mask = torch.isnan(item_features).any(dim=1) | torch.isinf(item_features).any(dim=1)
bad_items = bad_item_mask.nonzero(as_tuple=True)[0]

print("Bad item count:", len(bad_items))
print("Bad item indices:", bad_items[:20])

Bad item count: 0
Bad item indices: tensor([], dtype=torch.int64)


Baseline Model: LightGCN

In [54]:
# ============================================================
# Cell 6: Train graph (user–item bipartite, symmetric) & helpers
# ============================================================
# User nodes: 0..num_users-1
# Item nodes: num_users..num_users+num_items-1

u_train = torch.tensor(train_df["u"].values, dtype=torch.long)
i_train = torch.tensor(train_df["i"].values + num_users, dtype=torch.long)

# make edges bidirectional
edge_index = torch.stack([
    torch.cat([u_train, i_train]),
    torch.cat([i_train, u_train]),
], dim=0)

num_nodes = num_users + num_items
print("edge_index shape:", edge_index.shape, "num_nodes:", num_nodes)

# Positive items per user for TRAIN (for negative sampling)
user_pos_train = full_df_train.groupby("u")["i"].apply(set).to_dict()

def sample_negative(u: int, k: int = 1):
    """Random negative items for user u, using TRAIN positives only."""
    positives = user_pos_train.get(int(u), set())
    negatives = []
    while len(negatives) < k:
        j = np.random.randint(0, num_items)
        if j not in positives:
            negatives.append(j)
    return negatives


edge_index shape: torch.Size([2, 556774]) num_nodes: 28762


In [55]:
# ============================================================
# DEVICE SETUP
# ============================================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Move data tensors to correct device
edge_index = edge_index.to(device)
item_features = item_features.to(device)
user_features = user_features.to(device)

Using device: cuda


In [56]:
# ============================================================
# LightGCN (Device-Safe, Feature-Safe)
# ============================================================

class LightGCN(nn.Module):
    def __init__(self, num_users, num_items, user_feat_dim, item_feat_dim,
                 dim=64, n_layers=3):
        super().__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.dim = dim
        self.n_layers = n_layers

        # Learnable embeddings
        self.user_emb = nn.Embedding(num_users, dim)
        self.item_emb = nn.Embedding(num_items, dim)

        # Feature projections (all trained)
        self.user_feat_proj = nn.Linear(user_feat_dim, dim, bias=False)
        self.item_feat_proj = nn.Linear(item_feat_dim, dim, bias=False)

        nn.init.xavier_uniform_(self.user_emb.weight)
        nn.init.xavier_uniform_(self.item_emb.weight)

    def propagate(self, x, edge_index):
        row, col = edge_index
        deg = torch.bincount(row, minlength=x.size(0)).float()
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float("inf")] = 0

        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        msg = x[col] * norm.unsqueeze(1)

        out = torch.zeros_like(x)
        out.index_add_(0, row, msg)
        return out

    def forward(self, edge_index):
        # Move embeddings + features to correct device
        user_x = self.user_emb.weight + self.user_feat_proj(user_features)
        item_x = self.item_emb.weight + self.item_feat_proj(item_features)

        x = torch.cat([user_x, item_x], dim=0)
        all_layers = [x]

        for _ in range(self.n_layers):
            x = self.propagate(x, edge_index)
            all_layers.append(x)

        x_final = torch.stack(all_layers, dim=1).mean(dim=1)

        user_gcn = x_final[:self.num_users]
        item_gcn = x_final[self.num_users:]
        return user_gcn, item_gcn

    def predict(self, u, i, user_gcn, item_gcn):
        return (user_gcn[u] * item_gcn[i]).sum(dim=-1)

In [57]:
# ============================================================
# Cell 8: BPR loss & training utilities (with tqdm)
# ============================================================
from tqdm.auto import tqdm
import wandb

def bpr_loss(model, edge_index, batch_users, batch_pos_items):
    user_gcn, item_gcn = model(edge_index)

    neg_items = torch.tensor(
        [sample_negative(int(u))[0] for u in batch_users],
        dtype=torch.long,
        device=device,
    )

    pos_scores = model.predict(batch_users, batch_pos_items, user_gcn, item_gcn)
    neg_scores = model.predict(batch_users, neg_items, user_gcn, item_gcn)

    return -torch.log(torch.sigmoid(pos_scores - neg_scores)).mean()

def train_bpr_model(model, edge_index, train_df, epochs=10, batch_size=1024, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_users = torch.tensor(train_df["u"].values, dtype=torch.long, device=device)
    train_items = torch.tensor(train_df["i"].values, dtype=torch.long, device=device)

    epoch_losses, batch_losses = [], []

    for epoch in range(epochs):
        perm = torch.randperm(len(train_users), device=device)
        epoch_loss = 0.0

        pbar = tqdm(perm.split(batch_size), desc=f"LightGCN Epoch {epoch+1}/{epochs}")

        for step, idx in enumerate(pbar):
            batch_u = train_users[idx]
            batch_i = train_items[idx]

            loss = bpr_loss(model, edge_index, batch_u, batch_i)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            bl = loss.item()
            batch_losses.append(bl)
            epoch_loss += bl

            pbar.set_postfix({"batch_loss": f"{bl:.4f}", "epoch_loss": f"{epoch_loss:.4f}"})

        epoch_losses.append(epoch_loss)
        print(f"[LightGCN] Epoch {epoch+1}: total_loss={epoch_loss:.4f}")

    return model, epoch_losses, batch_losses


In [58]:
import wandb
wandb.login()

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 2


[34m[1mwandb[0m: You chose 'Use an existing W&B account'
[34m[1mwandb[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: Find your API key here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33melijahs2[0m ([33melijahs2-stanford-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [59]:
lightgcn_layers = 4
lightgcn_dim = 32
lightgcn_lr = 5e-4
lightgcn_batch_size = 2048

In [60]:
# ============================================================
# Cell 9: Train baseline LightGCN with W&B tracking
# ============================================================
import wandb

wandb.init(
    project="restaurant-recsys",
    name="LightGCN-normalized",
    config={
        "model": "LightGCN",
        "embedding_dim": lightgcn_dim,
        "layers": lightgcn_layers,
        "epochs": 10,
        "batch_size": lightgcn_batch_size,
        "learning_rate": lightgcn_lr,
        "num_users": num_users,
        "num_items": num_items,
        "item_feature_dim": item_features.shape[1],
        "user_feature_dim": user_features.shape[1],
    }
)

lgcn = LightGCN(
    num_users=num_users,
    num_items=num_items,
    user_feat_dim=user_features.shape[1],
    item_feat_dim=item_features.shape[1],
    dim=lightgcn_dim,
    n_layers=lightgcn_layers
).to(device)

lgcn, epoch_losses, batch_losses = train_bpr_model(
    lgcn,
    edge_index,
    train_df,
    epochs=10,
    batch_size=lightgcn_batch_size,
    lr=lightgcn_lr
)

wandb.finish()

LightGCN Epoch 1/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=50.4500


LightGCN Epoch 2/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=41.2661


LightGCN Epoch 3/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=37.2984


LightGCN Epoch 4/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=34.5390


LightGCN Epoch 5/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=32.5047


LightGCN Epoch 6/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 6: total_loss=31.3363


LightGCN Epoch 7/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 7: total_loss=29.9490


LightGCN Epoch 8/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 8: total_loss=28.9442


LightGCN Epoch 9/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 9: total_loss=28.2221


LightGCN Epoch 10/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 10: total_loss=27.2043


In [61]:
# ============================================================
# Cell 10: Evaluation metrics and generic evaluator
# ============================================================
def recall_at_k(target, ranked_list, k):
    return int(target in ranked_list[:k])

def ndcg_at_k(target, ranked_list, k):
    if target in ranked_list[:k]:
        rank = ranked_list.index(target) + 1
        return 1.0 / np.log2(rank + 1)
    return 0.0

def mrr_metric(target, ranked_list):
    if target in ranked_list:
        return 1.0 / (ranked_list.index(target) + 1)
    return 0.0

def evaluate_lgcn_like(model, edge_index, eval_df, k=20):
    model.eval()
    with torch.no_grad():
        user_gcn, item_gcn = model(edge_index)

    recalls, ndcgs, mrrs = [], [], []

    for row in tqdm(eval_df.itertuples(), total=len(eval_df)):
        u = int(row.u)
        true_i = int(row.i)

        scores = (user_gcn[u] * item_gcn).sum(dim=1).cpu().numpy()
        ranked_items = list(np.argsort(-scores))

        recalls.append(recall_at_k(true_i, ranked_items, k))
        ndcgs.append(ndcg_at_k(true_i, ranked_items, k))
        mrrs.append(mrr_metric(true_i, ranked_items))

    return {
        "Recall@{}".format(k): float(np.mean(recalls)),
        "NDCG@{}".format(k): float(np.mean(ndcgs)),
        "MRR": float(np.mean(mrrs)),
    }

print("Baseline LightGCN on validation:")
metrics = evaluate_lgcn_like(lgcn, edge_index, val_df, k=20)
metrics


Baseline LightGCN on validation:


  0%|          | 0/34798 [00:00<?, ?it/s]

{'Recall@20': 0.06592332892694983,
 'NDCG@20': 0.02586577951871519,
 'MRR': 0.01980682703959846}

LightGCN Hyperparam Tuning

In [62]:
# ============================================================
# LightGCN Hyperparameter Tuning (Grid Search)
# ============================================================

import pandas as pd
import itertools

# Search spaces (tune as needed)
dim_grid        = [32, 64, 128]        # embedding sizes
layers_grid     = [3, 4]            # number of propagation layers
batch_grid      = [1024, 2048, 4096]   # batch sizes
lr_grid         = [1e-3, 5e-4]         # learning rates
epochs_val      = 5                   # or 20 if you prefer longer training

results_lgcn = []

for dim_val, layers_val, batch_val, lr_val in itertools.product(
        dim_grid, layers_grid, batch_grid, lr_grid):

    print(f"\n=== Training LightGCN (dim={dim_val}, layers={layers_val}, "
          f"batch={batch_val}, lr={lr_val}) ===")

    # Initialize a fresh LightGCN model
    model = LightGCN(
        num_users=num_users,
        num_items=num_items,
        user_feat_dim=user_features.shape[1],
        item_feat_dim=item_features.shape[1],
        dim=dim_val,
        n_layers=layers_val
    ).to(device)

    # Train model
    model, epoch_losses, batch_losses = train_bpr_model(
        model,
        edge_index,
        train_df,
        epochs=epochs_val,
        batch_size=batch_val,
        lr=lr_val
    )

    # Evaluate on validation set
    metrics_val = evaluate_lgcn_like(model, edge_index, val_df, k=20)

    # Store results
    results_lgcn.append({
        "dim": dim_val,
        "layers": layers_val,
        "batch_size": batch_val,
        "lr": lr_val,
        "Recall@20_val": metrics_val["Recall@20"],
        "NDCG@20_val": metrics_val["NDCG@20"],
        "MRR_val": metrics_val["MRR"],
    })

# Build and display results table
lightgcn_tuning_df = pd.DataFrame(results_lgcn)
display(lightgcn_tuning_df.sort_values("Recall@20_val", ascending=False))


=== Training LightGCN (dim=32, layers=3, batch=1024, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=83.7088


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=63.2408


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=56.0244


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=50.7501


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=47.6286


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=3, batch=1024, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=91.7997


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=71.7037


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=63.9936


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=58.8841


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=55.6335


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=3, batch=2048, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=45.0833


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=34.6370


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=30.2689


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=28.1344


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=26.2765


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=3, batch=2048, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=49.3136


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=39.5059


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=35.8134


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=32.7603


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=30.9928


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=3, batch=4096, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=24.4757


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=19.2377


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=16.9986


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=15.6871


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=14.7009


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=3, batch=4096, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=26.1202


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=21.7568


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=19.4512


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=18.0449


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=17.0423


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=4, batch=1024, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=87.2268


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=66.1964


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=59.0080


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=54.9604


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=50.9905


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=4, batch=1024, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=94.7229


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=75.1332


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=68.0731


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=62.8833


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=59.4203


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=4, batch=2048, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=46.8527


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=36.3290


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=32.2247


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=29.8994


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=28.3193


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=4, batch=2048, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=50.4684


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=41.5869


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=37.3417


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=34.6109


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=32.5337


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=4, batch=4096, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=25.4422


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=20.3362


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=18.0295


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=16.6677


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=15.5065


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=32, layers=4, batch=4096, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=27.2416


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=22.4004


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=20.7772


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=19.2596


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=18.1374


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=3, batch=1024, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=76.5038


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=56.1616


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=47.8144


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=42.6170


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=38.7863


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=3, batch=1024, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=84.5830


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=64.6599


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=56.8817


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=51.7869


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=48.1122


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=3, batch=2048, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=42.0136


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=31.1040


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=26.9851


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=24.3198


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=22.1589


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=3, batch=2048, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=45.4415


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=35.4337


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=31.2907


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=28.8409


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=26.4688


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=3, batch=4096, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=22.7830


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=17.1531


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=15.1304


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=13.6327


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=12.7338


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=3, batch=4096, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=24.9095


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=19.7852


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=17.5880


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=16.0485


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=15.0665


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=4, batch=1024, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=79.8226


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=59.6409


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=52.5594


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=46.9879


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=42.5875


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=4, batch=1024, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=88.4165


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=69.2763


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=61.4038


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=56.7416


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=52.6804


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=4, batch=2048, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=43.0035


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=32.7148


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=28.7876


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=25.9458


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=24.0833


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=4, batch=2048, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=46.7911


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=37.0510


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=33.0565


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=30.6181


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=28.6354


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=4, batch=4096, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=24.4775


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=18.8225


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=16.4180


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=15.0141


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=13.8141


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=64, layers=4, batch=4096, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=25.2482


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=20.6607


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=18.3085


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=16.8987


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=15.8527


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=3, batch=1024, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=74.8696


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=50.5678


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=41.8357


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=36.4772


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=32.1032


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=3, batch=1024, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=79.1912


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=58.0486


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=50.2674


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=44.4981


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=40.9246


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=3, batch=2048, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=39.5050


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=27.6737


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=23.2918


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=20.5396


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=18.4713


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=3, batch=2048, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=45.5532


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=33.0133


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=28.4322


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=25.9714


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=23.8752


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=3, batch=4096, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=24.2796


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=16.3856


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=13.8820


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=12.3369


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=11.1814


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=3, batch=4096, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=25.4690


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=18.7270


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=16.4120


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=14.7771


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=13.7316


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=4, batch=1024, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=76.3856


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=54.2550


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=45.0475


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=39.7645


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=35.6960


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=4, batch=1024, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=84.0307


LightGCN Epoch 2/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=62.4238


LightGCN Epoch 3/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=54.6174


LightGCN Epoch 4/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=49.1813


LightGCN Epoch 5/5:   0%|          | 0/272 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=45.3242


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=4, batch=2048, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=42.2242


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=30.1047


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=25.8220


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=22.8000


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=20.8220


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=4, batch=2048, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=46.4427


LightGCN Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=34.8442


LightGCN Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=30.5640


LightGCN Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=27.8243


LightGCN Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=25.9205


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=4, batch=4096, lr=0.001) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=23.0598


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=16.6660


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=14.4615


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=13.0305


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=11.9277


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCN (dim=128, layers=4, batch=4096, lr=0.0005) ===


LightGCN Epoch 1/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 1: total_loss=25.5879


LightGCN Epoch 2/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 2: total_loss=19.8506


LightGCN Epoch 3/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 3: total_loss=17.3299


LightGCN Epoch 4/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 4: total_loss=15.8701


LightGCN Epoch 5/5:   0%|          | 0/68 [00:00<?, ?it/s]

[LightGCN] Epoch 5: total_loss=14.9016


  0%|          | 0/34798 [00:00<?, ?it/s]

Unnamed: 0,dim,layers,batch_size,lr,Recall@20_val,NDCG@20_val,MRR_val
20,64,4,2048,0.001,0.067906,0.025947,0.019349
4,32,3,4096,0.001,0.066469,0.02596,0.019693
33,128,4,2048,0.0005,0.066354,0.025606,0.01942
11,32,4,4096,0.0005,0.066268,0.0263,0.020095
2,32,3,2048,0.001,0.066153,0.025393,0.019176
7,32,4,1024,0.0005,0.066067,0.025915,0.019805
22,64,4,4096,0.001,0.066067,0.025573,0.019279
8,32,4,2048,0.001,0.066038,0.025947,0.019873
10,32,4,4096,0.001,0.065923,0.025976,0.019837
9,32,4,2048,0.0005,0.065349,0.025803,0.019755


Now we move to extended models!

LightGCL

In [63]:
dim = 64
layers = 3
epochs = 10
lr = 5e-4

In [64]:
# ============================================================
# LightGCL: SVD Global View + Contrastive Setup (Train-only)
# ============================================================

from scipy.sparse import coo_matrix
from scipy.sparse.linalg import svds
import torch.nn.functional as F

# --- 1. Build implicit matrix from TRAIN ONLY ---
rows = train_df["u"].values
cols = train_df["i"].values
data = np.ones_like(rows, dtype=np.float32)

M_train = coo_matrix((data, (rows, cols)), shape=(num_users, num_items))
print("M_train shape:", M_train.shape)

# --- 2. SVD rank MUST match LightGCN embedding dimension ---
# dim is the embedding dimension you used for LightGCN
k_svd = dim                       # <<< IMPORTANT: match model dimension >>>
k_svd = min(k_svd, min(num_users, num_items) - 1)

U, s, Vt = svds(M_train, k=k_svd)
idx = np.argsort(-s)
U, s, Vt = U[:, idx], s[idx], Vt[idx, :]

Sigma_sqrt = np.sqrt(s)

P_svd = U * Sigma_sqrt          # (num_users, k_svd)
Q_svd = (Vt.T) * Sigma_sqrt     # (num_items, k_svd)

svd_user_emb = torch.tensor(P_svd, dtype=torch.float32, device=device)
svd_item_emb = torch.tensor(Q_svd, dtype=torch.float32, device=device)

print("svd_user_emb:", svd_user_emb.shape)
print("svd_item_emb:", svd_item_emb.shape)

# --- 3. Normalize & detach SVD embeddings for stability ---
svd_u_norm = F.normalize(svd_user_emb, dim=-1).detach()
svd_i_norm = F.normalize(svd_item_emb, dim=-1).detach()

# --- 4. LightGCL hyperparameters (will be tuned later) ---
tau         = 0.03    # temperature (by tuning)
lambda_con  = 0.1    # contrastive weight (by tuning)
lambda_reg  = 1e-4   # L2 weight (on embeddings)


def contrastive_loss(z, z_tilde, tau=0.2):
    """
    InfoNCE loss: z, z_tilde are (N, d) with aligned rows.
    """
    z = F.normalize(z, dim=-1)
    z_tilde = F.normalize(z_tilde, dim=-1)

    logits = z @ z_tilde.T / tau    # (N, N)
    labels = torch.arange(z.size(0), device=z.device)
    return F.cross_entropy(logits, labels)

M_train shape: (25840, 2922)
svd_user_emb: torch.Size([25840, 64])
svd_item_emb: torch.Size([2922, 64])


In [65]:
# ============================================================
# LightGCL: Step function (BPR + Contrastive)
# ============================================================

def lightgcl_step(model, edge_index, batch_users, batch_pos_items):
    # GCN forward
    user_gcn, item_gcn = model(edge_index)

    # Base BPR loss (exactly as LightGCN)
    loss_bpr = bpr_loss(model, edge_index, batch_users, batch_pos_items)

    # Per-user alignment
    u_embed = user_gcn[batch_users]          # (B, d)
    u_svd   = svd_u_norm[batch_users]        # (B, d)

    # Per-item alignment
    i_embed = item_gcn[batch_pos_items]      # (B, d)
    i_svd   = svd_i_norm[batch_pos_items]    # (B, d)

    L_con_u = contrastive_loss(u_embed, u_svd, tau=tau)
    L_con_i = contrastive_loss(i_embed, i_svd, tau=tau)

    # L2 regularization on learnable embeddings
    reg = 0.5 * (
        model.user_emb.weight.norm(2)**2 +
        model.item_emb.weight.norm(2)**2
    )

    return loss_bpr + lambda_con * (L_con_u + L_con_i) + lambda_reg * reg


# ============================================================
# LightGCL: Training loop (no wandb inside)
# ============================================================

def train_lightgcl(model, edge_index, train_df,
                   epochs=5, batch_size=1024, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_users = torch.tensor(train_df["u"].values, dtype=torch.long, device=device)
    train_items = torch.tensor(train_df["i"].values, dtype=torch.long, device=device)

    epoch_losses, batch_losses = [], []

    for epoch in range(epochs):
        perm = torch.randperm(len(train_users), device=device)
        epoch_loss = 0.0

        pbar = tqdm(perm.split(batch_size), desc=f"LightGCL Epoch {epoch+1}/{epochs}")

        for step, idx in enumerate(pbar):
            batch_u = train_users[idx]
            batch_i = train_items[idx]

            loss = lightgcl_step(model, edge_index, batch_u, batch_i)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            bl = loss.item()
            batch_losses.append(bl)
            epoch_loss += bl

            pbar.set_postfix({
                "batch_loss": f"{bl:.4f}",
                "epoch_loss": f"{epoch_loss:.4f}"
            })

        epoch_losses.append(epoch_loss)
        print(f"[LightGCL] Epoch {epoch+1}: total_loss={epoch_loss:.4f}")

    return model, epoch_losses, batch_losses

In [66]:
# ============================================================
# Train LightGCL with W&B Tracking
# ============================================================

wandb.init(
    project="restaurant-recsys",
    name="LightGCL",
    config={
        "model": "LightGCL",
        "epochs": epochs,
        "batch_size": 2048,
        "lr": lr,
        "embedding_dim": dim,
        "layers": layers
    }
)

lgcl = LightGCN(
    num_users=num_users,
    num_items=num_items,
    user_feat_dim=user_features.shape[1],
    item_feat_dim=item_features.shape[1],
    dim=dim,
    n_layers=layers,
).to(device)

lgcl, lgcl_epoch_losses, lgcl_batch_losses = train_lightgcl(
    lgcl, edge_index, train_df,
    epochs=epochs, batch_size=2048, lr=lr
)

wandb.finish()

LightGCL Epoch 1/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=230.9260


LightGCL Epoch 2/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=131.2981


LightGCL Epoch 3/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=101.6959


LightGCL Epoch 4/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=87.3142


LightGCL Epoch 5/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=78.4554


LightGCL Epoch 6/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 6: total_loss=72.7084


LightGCL Epoch 7/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 7: total_loss=68.3462


LightGCL Epoch 8/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 8: total_loss=64.9468


LightGCL Epoch 9/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 9: total_loss=62.1737


LightGCL Epoch 10/10:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 10: total_loss=60.0745


In [67]:
# ============================================================
# LightGCL Hyperparameter Tuning (grid search)
# ============================================================

import pandas as pd
import itertools

# define search grid
tau_grid        = [0.03, 0.05, 0.1]
lambda_con_grid = [0.03, 0.05, 0.1]
lambda_reg_val  = 1e-4

results = []

for tau_val, lambda_con_val in itertools.product(tau_grid, lambda_con_grid):
    print(f"\n=== Training LightGCL with tau={tau_val}, lambda_con={lambda_con_val} ===")

    # set globals used by lightgcl_step
    tau        = tau_val
    lambda_con = lambda_con_val
    lambda_reg = lambda_reg_val

    # (re)initialize model
    model = LightGCN(
        num_users=num_users,
        num_items=num_items,
        user_feat_dim=user_features.shape[1],
        item_feat_dim=item_features.shape[1],
        dim=dim,
        n_layers=layers,
    ).to(device)

    # train model
    model, epoch_losses, batch_losses = train_lightgcl(
        model, edge_index, train_df,
        epochs=5, batch_size=2048, lr=1e-3
    )

    # evaluate on validation set
    metrics_val = evaluate_lgcn_like(model, edge_index, val_df, k=20)

    run_result = {
        "tau": tau_val,
        "lambda_con": lambda_con_val,
        "Recall@20_val": metrics_val["Recall@20"],
        "NDCG@20_val": metrics_val["NDCG@20"],
        "MRR_val": metrics_val["MRR"],
    }
    results.append(run_result)

# build comparison table
lightgcl_tuning_df = pd.DataFrame(results)
display(lightgcl_tuning_df.sort_values("Recall@20_val", ascending=False))


=== Training LightGCL with tau=0.03, lambda_con=0.03 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=91.6453


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=57.7802


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=47.7148


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=42.0634


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=38.6533


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL with tau=0.03, lambda_con=0.05 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=118.1773


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=69.8219


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=56.9477


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=50.0666


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=45.6262


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL with tau=0.03, lambda_con=0.1 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=184.0272


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=98.1734


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=77.8633


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=68.5307


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=62.9207


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL with tau=0.05, lambda_con=0.03 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=87.6599


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=60.6049


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=51.5172


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=46.2146


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=42.8116


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL with tau=0.05, lambda_con=0.05 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=113.1676


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=75.1372


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=63.2926


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=56.7394


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=52.6984


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL with tau=0.05, lambda_con=0.1 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=175.5112


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=107.1343


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=88.1694


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=79.1516


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=73.8556


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL with tau=0.1, lambda_con=0.03 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=92.1355


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=71.4944


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=64.1254


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=59.6650


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=56.5839


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL with tau=0.1, lambda_con=0.05 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=122.5306


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=94.1567


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=83.9373


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=77.9568


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=74.1533


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL with tau=0.1, lambda_con=0.1 ===


LightGCL Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 1: total_loss=192.3032


LightGCL Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 2: total_loss=144.4103


LightGCL Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 3: total_loss=127.6983


LightGCL Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 4: total_loss=119.0759


LightGCL Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL] Epoch 5: total_loss=113.9279


  0%|          | 0/34798 [00:00<?, ?it/s]

Unnamed: 0,tau,lambda_con,Recall@20_val,NDCG@20_val,MRR_val
8,0.1,0.1,0.064745,0.023357,0.016997
2,0.03,0.1,0.063509,0.02289,0.016894
0,0.03,0.03,0.062848,0.023064,0.017099
1,0.03,0.05,0.062791,0.022443,0.016434
4,0.05,0.05,0.062417,0.022378,0.016452
5,0.05,0.1,0.06236,0.022202,0.01627
6,0.1,0.03,0.061239,0.022316,0.016581
7,0.1,0.05,0.060521,0.022044,0.016401
3,0.05,0.03,0.06032,0.022046,0.016524


In [68]:
# ============================================================
# Evaluate LightGCL on validation set + W&B logging
# ============================================================

import wandb

wandb.init(
    project="restaurant-recsys",
    name="LightGCL-eval",
    config={
        "stage": "evaluation",
        "model": "LightGCL"
    }
)

print("Evaluating LightGCL on validation set...\n")

metrics_lgcl_val = evaluate_lgcn_like(lgcl, edge_index, val_df, k=20)

wandb.log({
    "LightGCL/Recall@20_val": metrics_lgcl_val["Recall@20"],
    "LightGCL/NDCG@20_val":  metrics_lgcl_val["NDCG@20"],
    "LightGCL/MRR_val":      metrics_lgcl_val["MRR"],
})

print("LightGCL Validation Results:")
print(metrics_lgcl_val)

wandb.finish()

Evaluating LightGCL on validation set...



  0%|          | 0/34798 [00:00<?, ?it/s]

LightGCL Validation Results:
{'Recall@20': 0.06549226967067073, 'NDCG@20': 0.024039693254372684, 'MRR': 0.01776855520617207}


0,1
LightGCL/MRR_val,▁
LightGCL/NDCG@20_val,▁
LightGCL/Recall@20_val,▁

0,1
LightGCL/MRR_val,0.01777
LightGCL/NDCG@20_val,0.02404
LightGCL/Recall@20_val,0.06549


LightGCL+Geo

In [69]:
# LightGCL+Geo hyperparameters

beta = 0.1
rho = 0.1  # decay scale for distance
tau=0.1
lambda_con=0.05
lambda_reg_val = 1e-4
mu_geo=0.01

# Radius-aware negatives
R = 0.1        # tune as needed
alpha = 0.75   # popularity exponent

# Geo-smooth neighbors
k_nn = 10
rho_r = 0.1
mu_geo = 0.05

dim = 64
layers = 4
epochs = 5
lr = 5e-4

In [70]:
# ============================================================
# Geo Precomputation for LightGCL+Geo
# ============================================================

from sklearn.metrics import pairwise_distances

# Raw coords from TRAIN (not normalized features)
df_train_geo = full_df_train.copy()
df_train_geo["latitude"]  = pd.to_numeric(df_train_geo["latitude"],  errors="coerce")
df_train_geo["longitude"] = pd.to_numeric(df_train_geo["longitude"], errors="coerce")

# Item coordinates (num_items, 2)
item_coord_df = (
    df_train_geo.groupby("i")[["latitude", "longitude"]]
    .mean()
    .reindex(range(num_items))
    .fillna(0.0)
)
item_coord = item_coord_df.values

# User "home" coordinates (num_users, 2)
user_coord_df = (
    df_train_geo.groupby("u")[["latitude", "longitude"]]
    .mean()
    .reindex(range(num_users))
    .fillna(0.0)
)
user_coord = user_coord_df.values

def geo_distance(u, i):
    du = user_coord[int(u)]
    di = item_coord[int(i)]
    return float(np.linalg.norm(du - di))

item_pop = (
    full_df_train["i"].value_counts()
    .reindex(range(num_items))
    .fillna(0)
    .values
)

user_geo_candidates = {}
for u in range(num_users):
    dists = np.linalg.norm(item_coord - user_coord[u], axis=1)
    mask = dists <= R
    cand = np.where(mask)[0].tolist()
    cand = [j for j in cand if j not in user_pos_train.get(u, [])]
    if not cand:
        cand = [j for j in range(num_items) if j not in user_pos_train.get(u, [])]
    user_geo_candidates[u] = np.array(cand, dtype=int)

def sample_geo_negative(u, k=1):
    cand = user_geo_candidates[int(u)]
    weights = item_pop[cand] ** alpha
    if weights.sum() > 0:
        probs = weights / weights.sum()
    else:
        probs = None
    return np.random.choice(cand, size=k, p=probs, replace=True)

D_items = pairwise_distances(item_coord)
item_neighbors = []
item_neighbor_weights = []

for i in range(num_items):
    idx = np.argsort(D_items[i])[:k_nn + 1]
    idx = idx[1:]
    item_neighbors.append(idx)
    w = np.exp(-D_items[i, idx] / rho_r)
    item_neighbor_weights.append(w)

def geo_smooth_loss(item_gcn):
    loss = 0.0
    for i in range(num_items):
        neigh = item_neighbors[i]
        if len(neigh) == 0:
            continue
        qi = item_gcn[i]
        qj = item_gcn[neigh]
        w = torch.tensor(
            item_neighbor_weights[i], dtype=torch.float32, device=item_gcn.device
        )
        diff = qi.unsqueeze(0) - qj
        loss += (w * (diff ** 2).sum(dim=1)).sum()
    return loss / num_items

def bpr_geo_terms(model, edge_index, batch_users, batch_pos_items):
    user_gcn, item_gcn = model(edge_index)

    # radius-aware negatives
    neg_items_np = np.concatenate(
        [sample_geo_negative(u.item(), 1) for u in batch_users]
    )
    neg_items = torch.tensor(
        neg_items_np, dtype=torch.long, device=batch_users.device
    )

    pos_base = (user_gcn[batch_users] * item_gcn[batch_pos_items]).sum(dim=-1)
    neg_base = (user_gcn[batch_users] * item_gcn[neg_items]).sum(dim=-1)

    pos_d = torch.tensor(
        [geo_distance(u, i) for u, i in zip(batch_users, batch_pos_items)],
        dtype=torch.float32,
        device=pos_base.device,
    )
    neg_d = torch.tensor(
        [geo_distance(u, j) for u, j in zip(batch_users, neg_items)],
        dtype=torch.float32,
        device=neg_base.device,
    )

    pos_score = pos_base + beta * torch.exp(-pos_d / rho)
    neg_score = neg_base + beta * torch.exp(-neg_d / rho)

    loss_bpr_geo = -torch.log(torch.sigmoid(pos_score - neg_score)).mean()
    loss_geo_smooth = geo_smooth_loss(item_gcn)

    return loss_bpr_geo, loss_geo_smooth, user_gcn, item_gcn

In [71]:
# ============================================================
# LightGCL+Geo Step (BPR_geo + Contrastive + Smoothness)
# ============================================================

def lightgcl_geo_step(model, edge_index, batch_users, batch_pos_items):
    # Geo-aware BPR part
    loss_bpr_geo, loss_geo_smooth, user_gcn, item_gcn = bpr_geo_terms(
        model, edge_index, batch_users, batch_pos_items
    )

    # Contrastive alignment (same as LightGCL)
    u_embed = user_gcn[batch_users]
    u_svd   = svd_u_norm[batch_users]

    i_embed = item_gcn[batch_pos_items]
    i_svd   = svd_i_norm[batch_pos_items]

    L_con_u = contrastive_loss(u_embed, u_svd, tau=tau)
    L_con_i = contrastive_loss(i_embed, i_svd, tau=tau)

    reg = 0.5 * (
        model.user_emb.weight.norm(2)**2 +
        model.item_emb.weight.norm(2)**2
    )

    return (
        loss_bpr_geo +
        lambda_con*(L_con_u + L_con_i) +
        mu_geo * loss_geo_smooth +
        lambda_reg * reg
    )


# ============================================================
# LightGCL+Geo Training Loop
# ============================================================

def train_lightgcl_geo(model, edge_index, train_df, epochs=5, batch_size=1024, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_users = torch.tensor(train_df["u"].values, dtype=torch.long, device=device)
    train_items = torch.tensor(train_df["i"].values, dtype=torch.long, device=device)

    epoch_losses, batch_losses = [], []

    for epoch in range(epochs):

        perm = torch.randperm(len(train_users), device=device)
        epoch_loss = 0.0

        pbar = tqdm(perm.split(batch_size), desc=f"LightGCL+Geo Epoch {epoch+1}/{epochs}")

        for step, idx in enumerate(pbar):
            batch_u = train_users[idx]
            batch_i = train_items[idx]

            loss = lightgcl_geo_step(model, edge_index, batch_u, batch_i)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            bl = loss.item()
            batch_losses.append(bl)
            epoch_loss += bl

            pbar.set_postfix({"batch_loss": f"{bl:.4f}", "epoch_loss": f"{epoch_loss:.4f}"})

            wandb.log({
                "LightGCL_Geo/batch_loss": bl,
                "LightGCL_Geo/epoch": epoch + 1
            })

        epoch_losses.append(epoch_loss)
        wandb.log({"LightGCL_Geo/epoch_loss": epoch_loss})

        print(f"[LightGCL+Geo] Epoch {epoch+1}: total_loss={epoch_loss:.4f}")

    return model, epoch_losses, batch_losses

In [72]:
# ============================================================
# Train LightGCL+Geo with W&B Tracking
# ============================================================

import wandb

wandb.init(
    project="restaurant-recsys",
    name="LightGCL_Geo",
    config={
        "model": "LightGCL_Geo",
        "embedding_dim": dim,
        "layers": layers,
        "epochs": epochs,
        "batch_size": 2048,
        "learning_rate": lr,
        "tau": tau,
        "lambda_con": lambda_con,
        "lambda_reg": lambda_reg,
        "mu_geo": mu_geo,
    }
)

# initialize model
lgcl_geo = LightGCN(
    num_users=num_users,
    num_items=num_items,
    user_feat_dim=user_features.shape[1],
    item_feat_dim=item_features.shape[1],
    dim=dim,
    n_layers=layers,
).to(device)

# train model
lgcl_geo, lgcl_geo_epoch_losses, lgcl_geo_batch_losses = train_lightgcl_geo(
    lgcl_geo,
    edge_index,
    train_df,
    epochs=epochs,
    batch_size=2048,
    lr=lr
)

wandb.finish()

LightGCL+Geo Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 1: total_loss=238.8286


LightGCL+Geo Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 2: total_loss=187.5290


LightGCL+Geo Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 3: total_loss=170.1316


LightGCL+Geo Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 4: total_loss=160.8897


LightGCL+Geo Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 5: total_loss=154.1253


0,1
LightGCL_Geo/batch_loss,█▇▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
LightGCL_Geo/epoch,▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆██████████
LightGCL_Geo/epoch_loss,█▄▂▂▁

0,1
LightGCL_Geo/batch_loss,1.09741
LightGCL_Geo/epoch,5.0
LightGCL_Geo/epoch_loss,154.1253


In [73]:
# ============================================================
# Evaluate LightGCL+Geo (lgcl_geo) + W&B logging
# ============================================================

import wandb

wandb.init(
    project="restaurant-recsys",
    name="LightGCLgeo-eval",
    config={
        "stage": "evaluation",
        "model": "LightGCL_Geo"
    }
)

print("Evaluating LightGCL+Geo on validation set...\n")

metrics_lgcl_geo_val = evaluate_lgcn_like(lgcl_geo, edge_index, val_df, k=20)

wandb.log({
    "LightGCL_Geo/Recall@20_val": metrics_lgcl_geo_val["Recall@20"],
    "LightGCL_Geo/NDCG@20_val":  metrics_lgcl_geo_val["NDCG@20"],
    "LightGCL_Geo/MRR_val":      metrics_lgcl_geo_val["MRR"],
})

print("LightGCL+Geo Validation Results:")
print(metrics_lgcl_geo_val)

Evaluating LightGCL+Geo on validation set...



  0%|          | 0/34798 [00:00<?, ?it/s]

LightGCL+Geo Validation Results:
{'Recall@20': 0.0147422265647451, 'NDCG@20': 0.005838113737866626, 'MRR': 0.0054732539445139115}


In [74]:
# ============================================================
# Model Comparison Table (Auto-Logged)
# ============================================================

comparison_df = pd.DataFrame({
    "LightGCL":      metrics_lgcl_val,
    "LightGCL+Geo":  metrics_lgcl_geo_val,
}).T  # transpose so rows = models

print("\nModel Comparison Table:")
print(comparison_df)

# Log to W&B as a table
wandb.log({"model_comparison": wandb.Table(dataframe=comparison_df)})


Model Comparison Table:
              Recall@20   NDCG@20       MRR
LightGCL       0.065492  0.024040  0.017769
LightGCL+Geo   0.014742  0.005838  0.005473


In [None]:
# Hyperparam tuning

In [76]:
# ============================================================
# LightGCL+Geo Hyperparameter Tuning (Grid Search)
# ============================================================

import pandas as pd
import itertools

# Define search grid
tau_grid        = [0.03, 0.05]          # you can expand to [0.03, 0.05, 0.1]
lambda_con_grid = [0.05]           # same structure as LightGCL
mu_geo_grid     = [0.01]     # typical geo weights
lambda_reg_val  = 1e-4                  # fixed, same as LightGCL

results_geo = []

for tau_val, lambda_con_val, mu_geo_val in itertools.product(
        tau_grid, lambda_con_grid, mu_geo_grid):

    print(f"\n=== Training LightGCL+Geo with "
          f"tau={tau_val}, lambda_con={lambda_con_val}, mu_geo={mu_geo_val} ===")

    # Set GLOBALS used by lightgcl_geo_step
    tau        = tau_val
    lambda_con = lambda_con_val
    mu_geo     = mu_geo_val
    lambda_reg = lambda_reg_val

    # Reinitialize model
    model_geo = LightGCN(
        num_users=num_users,
        num_items=num_items,
        user_feat_dim=user_features.shape[1],
        item_feat_dim=item_features.shape[1],
        dim=dim,
        n_layers=layers,
    ).to(device)

    # Train
    model_geo, epoch_losses, batch_losses = train_lightgcl_geo(
        model_geo, edge_index, train_df,
        epochs=5, batch_size=2048, lr=1e-3
    )

    # Validation evaluation
    metrics_val = evaluate_lgcn_like(model_geo, edge_index, val_df, k=20)

    # Store run results
    run_result = {
        "tau": tau_val,
        "lambda_con": lambda_con_val,
        "mu_geo": mu_geo_val,
        "Recall@20_val": metrics_val["Recall@20"],
        "NDCG@20_val": metrics_val["NDCG@20"],
        "MRR_val": metrics_val["MRR"],
    }

    results_geo.append(run_result)

# Build result table
lightgcl_geo_tuning_df = pd.DataFrame(results_geo)
display(lightgcl_geo_tuning_df.sort_values("Recall@20_val", ascending=False))



=== Training LightGCL+Geo with tau=0.03, lambda_con=0.05, mu_geo=0.01 ===


LightGCL+Geo Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 1: total_loss=202.1866


LightGCL+Geo Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 2: total_loss=143.3848


LightGCL+Geo Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 3: total_loss=117.6581


LightGCL+Geo Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 4: total_loss=108.2811


LightGCL+Geo Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 5: total_loss=104.4661


  0%|          | 0/34798 [00:00<?, ?it/s]


=== Training LightGCL+Geo with tau=0.05, lambda_con=0.05, mu_geo=0.01 ===


LightGCL+Geo Epoch 1/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 1: total_loss=182.7761


LightGCL+Geo Epoch 2/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 2: total_loss=139.6497


LightGCL+Geo Epoch 3/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 3: total_loss=115.6540


LightGCL+Geo Epoch 4/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 4: total_loss=106.4836


LightGCL+Geo Epoch 5/5:   0%|          | 0/136 [00:00<?, ?it/s]

[LightGCL+Geo] Epoch 5: total_loss=111.8844


  0%|          | 0/34798 [00:00<?, ?it/s]

Unnamed: 0,tau,lambda_con,mu_geo,Recall@20_val,NDCG@20_val,MRR_val
1,0.05,0.05,0.01,0.055147,0.019907,0.014684
0,0.03,0.05,0.01,0.024283,0.009762,0.008346


Contrasting Models

Real PinSage

In [77]:
# ============================================================
# Random-walk based neighbor sampler for PinSage
# ============================================================
import numpy as np
import torch
from collections import defaultdict

# Build item-item adjacency from co-reviews (undirected)
adj = [set() for _ in range(num_items)]

for u, g in train_df.groupby("u"):
    items = g["i"].values
    items = np.unique(items)
    for a in items:
        for b in items:
            if a != b:
                adj[a].add(b)
                adj[b].add(a)

# Turn sets into lists for faster indexing
adj = [list(neigh) for neigh in adj]

def build_rw_neighbors(
    adj,
    num_items,
    num_walks=10,
    walk_length=5,
    num_neighbors=50,
    seed=42
):
    rng = np.random.RandomState(seed)
    neigh_idx = np.zeros((num_items, num_neighbors), dtype=np.int64)
    neigh_w   = np.zeros((num_items, num_neighbors), dtype=np.float32)

    all_items = np.arange(num_items)

    for i in range(num_items):
        visits = defaultdict(int)

        if len(adj[i]) == 0:
            # no adjacency: fall back to random neighbors
            chosen = rng.choice(all_items[all_items != i], size=num_neighbors, replace=True)
            weights = np.ones_like(chosen, dtype=np.float32)
        else:
            # random walks from item i
            for _ in range(num_walks):
                cur = i
                for _ in range(walk_length):
                    neighs = adj[cur]
                    if not neighs:
                        break
                    nxt = rng.choice(neighs)
                    visits[nxt] += 1
                    cur = nxt

            if len(visits) == 0:
                chosen = rng.choice(all_items[all_items != i], size=num_neighbors, replace=True)
                weights = np.ones_like(chosen, dtype=np.float32)
            else:
                items_vis = np.array(list(visits.keys()))
                counts    = np.array([visits[k] for k in items_vis], dtype=np.float32)
                order = np.argsort(-counts)
                items_vis = items_vis[order]
                counts    = counts[order]

                if len(items_vis) >= num_neighbors:
                    chosen  = items_vis[:num_neighbors]
                    weights = counts[:num_neighbors]
                else:
                    pad_size = num_neighbors - len(items_vis)
                    pad_items = rng.choice(all_items[all_items != i], size=pad_size, replace=True)
                    chosen  = np.concatenate([items_vis, pad_items])
                    weights = np.concatenate([counts, np.ones(pad_size, dtype=np.float32)])

        weights = weights + 1e-8
        weights = weights / weights.sum()

        neigh_idx[i] = chosen
        neigh_w[i]   = weights

    return neigh_idx, neigh_w

print("Building random-walk neighbors for PinSage...")
rw_neigh_idx_np, rw_neigh_w_np = build_rw_neighbors(
    adj,
    num_items,
    num_walks=10,      # you can tune
    walk_length=5,     # you can tune
    num_neighbors=50,  # you can tune
    seed=42
)

rw_neigh_idx = torch.tensor(rw_neigh_idx_np, dtype=torch.long, device=device)
rw_neigh_w   = torch.tensor(rw_neigh_w_np,   dtype=torch.float32, device=device)

print("Random-walk neighbors built:",
      rw_neigh_idx.shape, rw_neigh_w.shape)

Building random-walk neighbors for PinSage...
Random-walk neighbors built: torch.Size([2922, 50]) torch.Size([2922, 50])


In [80]:
# ============================================================
# PinSage with random-walk importance pooling
# ============================================================
import torch.nn as nn
import torch.nn.functional as F

class RWPinSageLayer(nn.Module):
    """
    One PinSage-style layer:
    - importance-weighted neighbor aggregation
    - optional residual from self representation
    """
    def __init__(self, in_dim, out_dim, use_residual=True):
        super().__init__()
        self.proj = nn.Linear(in_dim, out_dim)
        self.use_residual = use_residual

    def forward(self, x, neigh_idx, neigh_w):
        """
        x:           (N, d)
        neigh_idx:   (N, T)
        neigh_w:     (N, T)
        """
        neigh_emb = x[neigh_idx]              # (N, T, d)
        w = neigh_w.unsqueeze(-1)             # (N, T, 1)
        agg = (neigh_emb * w).sum(dim=1)      # (N, d)

        if self.use_residual:
            agg = agg + x

        h = self.proj(agg)
        return F.relu(h)

class RWPinSageEncoder(nn.Module):
    def __init__(self, in_dim, hidden_dim=64, num_layers=2):
        super().__init__()
        self.input_proj = nn.Linear(in_dim, hidden_dim)
        self.layers = nn.ModuleList([
            RWPinSageLayer(hidden_dim, hidden_dim, use_residual=True)
            for _ in range(num_layers)
        ])

    def forward(self, x, neigh_idx, neigh_w):
        h = F.relu(self.input_proj(x))
        for layer in self.layers:
            h = layer(h, neigh_idx, neigh_w)
        return F.normalize(h, dim=1)

class RWPinSageRecommender(nn.Module):
    def __init__(self, in_dim, hidden_dim=64, num_layers=2):
        super().__init__()
        self.encoder = RWPinSageEncoder(in_dim, hidden_dim, num_layers)

    def forward(self, item_x, neigh_idx, neigh_w):
        return self.encoder(item_x, neigh_idx, neigh_w)

In [81]:
# ============================================================
# Train RW-PinSage with BPR
# ============================================================

def train_pinsage_rw(
    item_features,
    neigh_idx,
    neigh_w,
    epochs=5,
    hidden_dim=64,
    num_layers=2,
    batch_size=2048,
    lr=5e-4,
    max_pos_per_user=10,
    max_steps_per_epoch=200
):
    model = RWPinSageRecommender(item_features.size(1), hidden_dim, num_layers).to(device)
    opt = torch.optim.Adam(model.parameters(), lr=lr)

    # user -> list of positive items
    user_pos = train_df.groupby("u")["i"].apply(list).to_dict()
    train_user_ids = np.array(list(user_pos.keys()))

    def sample_triplet(bs):
        users = np.random.choice(train_user_ids, size=bs)
        pos = np.array([np.random.choice(user_pos[u]) for u in users])
        neg = np.random.randint(0, num_items, size=bs)
        return (
            torch.tensor(users, device=device, dtype=torch.long),
            torch.tensor(pos,    device=device, dtype=torch.long),
            torch.tensor(neg,    device=device, dtype=torch.long),
        )

    for ep in range(1, epochs+1):
        total = 0.0
        steps = min(max(1, len(train_df)//batch_size), max_steps_per_epoch)

        for _ in range(steps):
            u, i, j = sample_triplet(batch_size)

            # full-item embeddings for this step
            item_emb = model(item_features, neigh_idx, neigh_w)  # (num_items, d)

            # build user embedding as mean of some positives
            u_np = u.cpu().numpy()
            u_repr_list = []
            for uid in u_np:
                pos_items = user_pos[int(uid)]
                if len(pos_items) > max_pos_per_user:
                    pos_sample = np.random.choice(pos_items, size=max_pos_per_user, replace=False)
                else:
                    pos_sample = pos_items
                u_repr_list.append(item_emb[pos_sample].mean(0))
            u_repr = torch.stack(u_repr_list, dim=0)  # (B, d)

            pos_scores = (u_repr * item_emb[i]).sum(1)
            neg_scores = (u_repr * item_emb[j]).sum(1)

            loss = -F.logsigmoid(pos_scores - neg_scores).mean()

            opt.zero_grad()
            loss.backward()
            opt.step()

            total += loss.detach().item()

        print(f"[RW-PinSage] epoch {ep} loss={total/steps:.4f}")

    return model

In [82]:
# cache user->pos items once
pinsage_user_pos = train_df.groupby("u")["i"].apply(list).to_dict()

def evaluate_pinsage_rw(item_emb, eval_df, k=20):
    recalls, ndcgs, mrrs = [], [], []

    for row in eval_df.itertuples():
        u = int(row.u)
        true_i = int(row.i)

        pos_items = pinsage_user_pos.get(u, [])
        if len(pos_items) == 0:
            continue

        u_emb = item_emb[pos_items].mean(0)
        scores = (u_emb * item_emb).sum(dim=1).cpu().numpy()
        ranked = list(np.argsort(-scores))

        recalls.append(true_i in ranked[:k])
        ndcgs.append(ndcg_at_k(true_i, ranked, k))
        mrrs.append(mrr_metric(true_i, ranked))

    return {
        "Recall@20": float(np.mean(recalls)),
        "NDCG@20": float(np.mean(ndcgs)),
        "MRR": float(np.mean(mrrs)),
    }

In [83]:
rw_pinsage_model = train_pinsage_rw(
    item_features=item_features,
    neigh_idx=rw_neigh_idx,
    neigh_w=rw_neigh_w,
    epochs=5,
    hidden_dim=64,
    num_layers=2,
    batch_size=2048,
    lr=5e-4,
    max_steps_per_epoch=150
)

with torch.no_grad():
    rw_item_emb = rw_pinsage_model(item_features, rw_neigh_idx, rw_neigh_w)

rw_val_metrics = evaluate_pinsage_rw(rw_item_emb, val_df, k=20)
rw_val_metrics

[RW-PinSage] epoch 1 loss=0.5631
[RW-PinSage] epoch 2 loss=0.5410
[RW-PinSage] epoch 3 loss=0.5387
[RW-PinSage] epoch 4 loss=0.5403
[RW-PinSage] epoch 5 loss=0.5457


{'Recall@20': 0.03459338819919096,
 'NDCG@20': 0.012295280384647419,
 'MRR': 0.010416364149921779}

In [84]:
rw_pinsage_model = train_pinsage_rw(
    item_features=item_features,
    neigh_idx=rw_neigh_idx,
    neigh_w=rw_neigh_w,
    epochs=5,
    hidden_dim=128,
    num_layers=2,
    batch_size=2048,
    lr=5e-4,
    max_steps_per_epoch=200
)

with torch.no_grad():
    rw_item_emb = rw_pinsage_model(item_features, rw_neigh_idx, rw_neigh_w)

rw_val_metrics = evaluate_pinsage_rw(rw_item_emb, val_df, k=20)
rw_val_metrics

[RW-PinSage] epoch 1 loss=0.5571
[RW-PinSage] epoch 2 loss=0.5387
[RW-PinSage] epoch 3 loss=0.5364
[RW-PinSage] epoch 4 loss=0.5347
[RW-PinSage] epoch 5 loss=0.5349


{'Recall@20': 0.03710419863300321,
 'NDCG@20': 0.01325433044103468,
 'MRR': 0.01133937320209415}