In [1]:
from google.colab import drive
drive.mount('/content/drive')

# IMPORTANT: Update this path to where you uploaded the 'Google-Colab' folder!
# Example: If you uploaded it to MyDrive -> HardGNN_Project -> Google-Colab
# the path would be '/content/drive/MyDrive/HardGNN_Project/Google-Colab'
import os
project_path = '/content/drive/MyDrive/Google-Colab'
os.chdir(project_path)

# Verify the current working directory and list files to confirm
print(f"Current working directory: {os.getcwd()}")
print("Files in current directory:")
for item in os.listdir('.'):
    print(item)

Mounted at /content/drive
Current working directory: /content/drive/MyDrive/Google-Colab
Files in current directory:
fallback_files
Datasets
__pycache__
Utils
Experimentation.md
DataHandler.py
HardGNN_Colab_Script.py
HardGNN_Jupyter_Notebook_Guide.md
HardGNN_model.py
IMPLEMENTATION_SUMMARY.md
main.py
Params.py
README.md
VERIFY_COLAB_READY.py
requirements_final.txt


In [2]:
# ========================================================================
# HardGNN: Hard Negative Sampling Enhanced SelfGNN for Google Colab
# ========================================================================
#
# This script adds hard negative sampling to the validated SelfGNN configurations.
# Copy each section into separate Colab cells as indicated by the comments.
#
# Configuration: Uses validated parameters + Hard Negative Sampling (τ=0.1, K=5, λ=0.1)
# ========================================================================

# ========================================================================
# Environment Setup and Installation
# ========================================================================

"""
🚀 HardGNN: Hard Negative Sampling Enhanced SelfGNN

This notebook adds hard negative sampling to validated SelfGNN configurations:
- Uses proven hyperparameters for each dataset
- Adds InfoNCE contrastive loss (τ=0.1, K=5, λ=0.1)
- Dataset-agnostic design
- GPU acceleration on Google Colab

## 📋 Setup Instructions:
1. Runtime → Change runtime type → GPU (T4, A100, or V100)
2. Set DATASET parameter below to your desired dataset
3. Run cells in order - dependencies will be installed automatically
4. Monitor training - logs show contrastive loss alongside standard metrics
"""

# ========================================================================
# 🔧 CONFIGURE YOUR EXPERIMENT HERE
# ========================================================================
DATASET = 'gowalla'  # Options: 'yelp', 'amazon', 'gowalla', 'movielens'
# ========================================================================

In [3]:
# ========================================================================
# CELL 1: Environment Setup - PRIORITIZING COLAB DEFAULTS for TF/NumPy
# ========================================================================
import subprocess
import sys
import os
import site

def install_missing_dependencies():
    """Install/upgrade non-ML core dependencies. NumPy and TensorFlow should be Colab's defaults."""
    print("🔄 Installing/upgrading non-ML core dependencies for HardGNN...")
    print(f"📍 Detected Python version: {sys.version}")

    # We will NOT install numpy, tensorflow, or ml-dtypes via pip.
    # We rely on Colab's pre-installed versions.
    dependencies = [
        "matplotlib>=3.5.0",
        "scipy>=1.12.0",
        "protobuf>=3.19.0,<4.25.0",
        "pandas>=1.3.0",
        "scikit-learn>=1.0.0"
    ]

    print("🔄 Attempting to install/upgrade non-ML core dependencies to user site...")
    for dep in dependencies:
        print(f"📦 Processing {dep}...")
        try:
            # Using --upgrade will install if not present, or upgrade if it is.
            # --user to keep it in user space.
            command = [sys.executable, "-m", "pip", "install", "--no-cache-dir", "--upgrade", "--user", dep]
            print(f"   Executing: {' '.join(command)}")
            result = subprocess.run(command,
                                  check=True, capture_output=True, text=True, timeout=180)
            print(f"✅ Successfully processed {dep}.")
        except subprocess.CalledProcessError as e:
            print(f"⚠️ Warning: Could not process {dep}. Pip stdout: {e.stdout.strip()}. Pip stderr: {e.stderr.strip()}")
        except subprocess.TimeoutExpired as e:
            print(f"⚠️ Timeout: Processing of {dep} took too long. Pip stdout: {e.stdout.strip()}. Pip stderr: {e.stderr.strip()}")
        except Exception as e:
            print(f"⚠️ An unexpected error occurred processing {dep}: {e}")

    print("✅ Non-ML core dependency processing complete.")
    try:
        # Ensure user site packages are in path
        if hasattr(site, 'USER_SITE') and site.USER_SITE and site.USER_SITE not in sys.path:
            print(f"Adding {site.USER_SITE} to sys.path (priority 0)")
            sys.path.insert(0, site.USER_SITE)
        # For Colab/Linux, also consider adding local/bin to PATH if it exists for any pip installed CLIs
        local_bin_path = os.path.expanduser("~/.local/bin")
        if os.path.isdir(local_bin_path) and local_bin_path not in os.environ['PATH']:
            print(f"Adding {local_bin_path} to PATH")
            os.environ['PATH'] = local_bin_path + os.pathsep + os.environ['PATH']
    except Exception as e:
        print(f"⚠️ Could not robustly update sys.path/PATH for user site: {e}")

def setup_tensorflow_compatibility(tf_module, numpy_module):
    print(f"🔧 Setting up TensorFlow compatibility...")
    print(f"📍 Using TensorFlow version: {tf_module.__version__ if tf_module else 'N/A'}")
    print(f"📍 Using NumPy version: {numpy_module.__version__ if numpy_module else 'N/A'}")

    if not tf_module or not numpy_module:
        print("❌ CRITICAL: TensorFlow or NumPy module not available. Cannot proceed with setup.")
        return False

    # Informational checks about loaded versions
    if numpy_module.__version__.startswith("2."):
        print(f"ℹ️ NumPy version is {numpy_module.__version__}. Colab's TensorFlow ({tf_module.__version__}) should be compatible (e.g., >=2.16 or specially built)." )
    elif numpy_module.__version__.startswith("1."):
        print(f"ℹ️ NumPy version is {numpy_module.__version__}. Colab's TensorFlow ({tf_module.__version__}) should be compatible (e.g., <=2.15 or specially built)." )
    else:
        print(f"⚠️ Unknown NumPy version pattern: {numpy_module.__version__}")

    try:
        import ml_dtypes
        print(f"📍 ml_dtypes version found: {ml_dtypes.__version__} (from {ml_dtypes.__file__})")
        if numpy_module.__version__.startswith("2.") and not ml_dtypes.__version__.startswith(("0.4", "0.5")):
            print(f"   ⚠️ WARNING: ml_dtypes version ({ml_dtypes.__version__}) might not be ideal for NumPy 2.x (expected 0.4.x or 0.5.x). Check for runtime issues.")
        elif numpy_module.__version__.startswith("1.") and ml_dtypes.__version__.startswith(("0.4", "0.5")):
             print(f"   ⚠️ WARNING: ml_dtypes version ({ml_dtypes.__version__}) might not be ideal for NumPy 1.x (expected <0.4.x). Check for runtime issues.")
    except ImportError:
        print("ℹ️ ml_dtypes not explicitly found or importable by script. TensorFlow might bundle it or manage it internally.")
    except Exception as e:
        print(f"⚠️ Error during ml_dtypes check: {e}")

    try:
        tf_module.compat.v1.disable_eager_execution()
        tf_module.compat.v1.disable_v2_behavior()
        print("✅ TensorFlow (Colab's default) configured for v1 compatibility mode.")

        gpus = tf_module.config.list_physical_devices('GPU')
        if gpus:
            print(f"🚀 GPU acceleration available: {len(gpus)} GPU(s) detected")
            for gpu_device in gpus:
                print(f"   - {gpu_device}")
                try:
                    tf_module.config.experimental.set_memory_growth(gpu_device, True)
                    print(f"✅ GPU memory growth configured for {gpu_device}")
                except RuntimeError as e:
                    print(f"⚠️ Could not configure GPU memory growth for {gpu_device}: {e}")
        else:
            print("⚠️ No GPU detected, will use CPU.")
        return True
    except AttributeError as e:
        print(f"❌ AttributeError during TensorFlow v1 compatibility setup: {e}")
        print(f"   This can happen if Colab's TensorFlow version ({tf_module.__version__}) is too old, or has an unexpected structure, or is incompatible with its NumPy ({numpy_module.__version__}).")
        return False
    except Exception as e:
        print(f"❌ Error setting up TensorFlow v1 compatibility layer: {e}")
        return False

def verify_colab_environment(tf_module, numpy_module):
    import sys
    import psutil
    import platform

    print("🔍 Verifying Google Colab Environment (using Colab defaults for TF/NumPy)...")
    print(f"📍 Python: {sys.version}")
    print(f"📍 sys.path (first few entries): {str(sys.path[:5])}")
    print(f"📍 Platform: {platform.platform()}")
    print(f"📍 Architecture: {platform.machine()}")

    if numpy_module:
        print(f"📍 NumPy Version (loaded): {numpy_module.__version__}")
        print(f"📍 NumPy Path: {numpy_module.__file__}")
    else:
        print("📍 NumPy Version (loaded): NOT LOADED")

    if tf_module:
        print(f"📍 TensorFlow Version (loaded): {tf_module.__version__}")
        print(f"📍 TensorFlow Path: {tf_module.__file__}")
    else:
        print("📍 TensorFlow Version (loaded): NOT LOADED")

    try:
        import ml_dtypes
        print(f"📍 ml_dtypes Version (loaded): {ml_dtypes.__version__} from {ml_dtypes.__file__}")
    except Exception:
        print(f"📍 ml_dtypes: Not found by script or error during import check (may be internal to TF).")

    try:
        import tensorflow_metadata # Check if it's part of Colab's default TF environment
        print(f"📍 tensorflow-metadata Version (loaded): {tensorflow_metadata.__version__} from {tensorflow_metadata.__file__}")
    except Exception:
        print(f"📍 tensorflow-metadata: Not found by script or error (may not be needed or part of default TF).")

    memory = psutil.virtual_memory()
    memory_gb = memory.total / (1024**3)
    print(f"📍 Available RAM: {memory_gb:.1f} GB")
    disk = psutil.disk_usage('/')
    disk_gb = disk.free / (1024**3)
    print(f"📍 Available disk space: {disk_gb:.1f} GB")
    return True

# --- Main Execution Flow ---
print("=" * 60)
print("🚀 HardGNN Setup for Google Colab Pro+ (PRIORITIZING COLAB DEFAULTS for TF/NumPy)")
print("=" * 60)

# 1. Install/Upgrade other dependencies (non TF/NumPy)
install_missing_dependencies()

# 2. Import Colab's default NumPy and TensorFlow
# These imports will now occur *after* pip has potentially modified the environment
# by installing other packages and their dependencies, and after sys.path modifications.
print("🔄 Importing Colab's default NumPy (post any other pip installs)...")
numpy_to_use = None
tensorflow_to_use = None

try:
    import numpy
    numpy_to_use = numpy
    print(f"✅ NumPy version loaded: {numpy_to_use.__version__} from {numpy_to_use.__file__}")
except Exception as e:
    print(f"❌ FAILED to import Colab's default NumPy: {e}")
    print("   This is a critical failure. Further steps will likely fail.")

print("🔄 Importing Colab's default TensorFlow (post any other pip installs)...")
try:
    import tensorflow
    tensorflow_to_use = tensorflow
    print(f"✅ TensorFlow version loaded: {tensorflow_to_use.__version__} from {tensorflow_to_use.__file__}")
except Exception as e:
    print(f"❌ FAILED to import Colab's default TensorFlow: {e}")
    print(f"   This could be due to an underlying issue with its dependencies (like the loaded NumPy version) or Colab environment configuration.")

# Check if imports were successful before proceeding
if not numpy_to_use or not tensorflow_to_use:
    # Allow script to continue to verify_colab_environment to see more details if one failed
    print("⚠️ CRITICAL FAILURE: Could not import Colab's default NumPy or TensorFlow. Environment setup will likely be incomplete or fail.")
    # We will let it proceed to verify_colab_environment and then the final check for setup_successful
    # rather than raising an immediate RuntimeError here, to get more diagnostic output.

# 3. Setup TensorFlow compatibility using the imported Colab modules
setup_successful = False # Default to False
if numpy_to_use and tensorflow_to_use:
    setup_successful = setup_tensorflow_compatibility(tf_module=tensorflow_to_use, numpy_module=numpy_to_use)
else:
    print("Skipping TensorFlow compatibility setup as core modules (NumPy/TensorFlow) failed to load.")

# 4. Verify environment using the imported Colab modules
verify_colab_environment(tf_module=tensorflow_to_use, numpy_module=numpy_to_use)

if not setup_successful:
    # Custom error message based on whether TF/NumPy even loaded
    if not numpy_to_use or not tensorflow_to_use:
        raise RuntimeError("❌ TensorFlow/NumPy native import failed. Cannot configure environment.")
    else:
        raise RuntimeError("❌ TensorFlow setup failed using Colab's default versions. There might be an incompatibility within the pre-built Colab environment, or the TF1 compatibility layer cannot be applied to the loaded versions.")

print("✅ Environment setup attempt complete using Colab's default TF/NumPy (or best effort)!")
print("=" * 60)

🚀 HardGNN Setup for Google Colab Pro+ (PRIORITIZING COLAB DEFAULTS for TF/NumPy)
🔄 Installing/upgrading non-ML core dependencies for HardGNN...
📍 Detected Python version: 3.11.12 (main, Apr  9 2025, 08:55:54) [GCC 11.4.0]
🔄 Attempting to install/upgrade non-ML core dependencies to user site...
📦 Processing matplotlib>=3.5.0...
   Executing: /usr/bin/python3 -m pip install --no-cache-dir --upgrade --user matplotlib>=3.5.0
✅ Successfully processed matplotlib>=3.5.0.
📦 Processing scipy>=1.12.0...
   Executing: /usr/bin/python3 -m pip install --no-cache-dir --upgrade --user scipy>=1.12.0
✅ Successfully processed scipy>=1.12.0.
📦 Processing protobuf>=3.19.0,<4.25.0...
   Executing: /usr/bin/python3 -m pip install --no-cache-dir --upgrade --user protobuf>=3.19.0,<4.25.0
✅ Successfully processed protobuf>=3.19.0,<4.25.0.
📦 Processing pandas>=1.3.0...
   Executing: /usr/bin/python3 -m pip install --no-cache-dir --upgrade --user pandas>=1.3.0
✅ Successfully processed pandas>=1.3.0.
📦 Processing

Instructions for updating:
non-resource variables are not supported in the long term


✅ TensorFlow version loaded: 2.18.0 from /usr/local/lib/python3.11/dist-packages/tensorflow/__init__.py
🔧 Setting up TensorFlow compatibility...
📍 Using TensorFlow version: 2.18.0
📍 Using NumPy version: 2.0.2
ℹ️ NumPy version is 2.0.2. Colab's TensorFlow (2.18.0) should be compatible (e.g., >=2.16 or specially built).
📍 ml_dtypes version found: 0.4.1 (from /usr/local/lib/python3.11/dist-packages/ml_dtypes/__init__.py)
✅ TensorFlow (Colab's default) configured for v1 compatibility mode.
⚠️ No GPU detected, will use CPU.
🔍 Verifying Google Colab Environment (using Colab defaults for TF/NumPy)...
📍 Python: 3.11.12 (main, Apr  9 2025, 08:55:54) [GCC 11.4.0]
📍 sys.path (first few entries): ['/root/.local/lib/python3.11/site-packages', '/content', '/env/python', '/usr/lib/python311.zip', '/usr/lib/python3.11']
📍 Platform: Linux-6.1.123+-x86_64-with-glibc2.35
📍 Architecture: x86_64
📍 NumPy Version (loaded): 2.0.2
📍 NumPy Path: /usr/local/lib/python3.11/dist-packages/numpy/__init__.py
📍 Tensor

In [4]:
# ========================================================================
# CELL 2: Dataset Configuration and Module Import
# ========================================================================
# Make the globally configured TensorFlow available as tf
if tensorflow_to_use: # Check if tensorflow_to_use was successfully imported in Cell 1
    tf = tensorflow_to_use
else:
    # Fallback or error if tensorflow_to_use didn't load, though Cell 1 should raise an error earlier.
    # This import might fail if Cell 1 failed catastrophically before setting tensorflow_to_use.
    import tensorflow as tf
    print("⚠️ Warning: tensorflow_to_use was not set from Cell 1. Attempted direct import of tensorflow as tf.")

# Core imports
import os
import numpy as np
import random
import pickle
import scipy.sparse as sp
import matplotlib.pyplot as plt
from ast import arg
from random import randint
import time
from datetime import datetime

# Import our modules
from Params import args
import Utils.TimeLogger as logger
from Utils.TimeLogger import log
from DataHandler import DataHandler

# Import the HardGNN model
print("\n🔧 Importing HardGNN model...")
try:
    from HardGNN_model import Recommender
    print("✅ Successfully imported HardGNN model")
except ImportError as e:
    print(f"❌ Failed to import HardGNN model: {e}")
    print("Please ensure all dependencies are properly installed.")
    sys.exit(1)

def configure_dataset(dataset_name):
    """Configure parameters based on validated configurations for each dataset"""

    # Set base dataset
    args.data = dataset_name.lower()

    # Dataset-specific validated configurations
    if dataset_name.lower() == 'yelp':
        # From yelp.sh - validated configuration
        args.lr = 1e-3
        args.reg = 1e-2
        args.temp = 0.1
        args.ssl_reg = 1e-7
        args.epoch = 150
        args.batch = 512
        args.sslNum = 40
        args.graphNum = 12
        args.gnn_layer = 3
        args.att_layer = 2
        args.testSize = 1000
        args.ssldim = 32
        args.sampNum = 40

    elif dataset_name.lower() == 'amazon':
        # From amazon.sh - validated configuration
        args.lr = 1e-3
        args.reg = 1e-2
        args.temp = 0.1
        args.ssl_reg = 1e-6
        args.epoch = 150
        args.batch = 512
        args.sslNum = 80
        args.graphNum = 5
        args.pred_num = 0
        args.gnn_layer = 3
        args.att_layer = 4
        args.testSize = 1000
        args.keepRate = 0.5
        args.sampNum = 40
        args.pos_length = 200

    elif dataset_name.lower() == 'gowalla':
        # From gowalla.sh - validated configuration
        args.lr = 2e-3
        args.reg = 1e-2
        args.temp = 0.1
        args.ssl_reg = 1e-6
        args.epoch = 150
        args.batch = 512
        args.graphNum = 3
        args.gnn_layer = 2
        args.att_layer = 1
        args.testSize = 1000
        args.sampNum = 40

    elif dataset_name.lower() == 'movielens':
        # From movielens.sh - validated configuration
        args.lr = 1e-3
        args.reg = 1e-2
        args.ssl_reg = 1e-6
        args.epoch = 150
        args.batch = 512
        args.sampNum = 40
        args.sslNum = 90
        args.graphNum = 6
        args.gnn_layer = 2
        args.att_layer = 3
        args.testSize = 1000
        args.ssldim = 48
        args.keepRate = 0.5
        args.pos_length = 200
        args.leaky = 0.5

    else:
        print(f"⚠️  Unknown dataset: {dataset_name}")
        print("Available datasets: yelp, amazon, gowalla, movielens")
        print("Using default parameters...")

    # Add hard negative sampling parameters (consistent across all datasets)
    args.use_hard_neg = True
    args.hard_neg_top_k = 5      # K = 5 hard negatives
    args.contrastive_weight = 0.1 # λ = 0.1 contrastive weight
    # Note: τ (temperature) is already set in args.temp = 0.1

    # Full experiment settings (demo mode removed)
    # args.epoch is now set by the dataset-specific configurations in the if/elif blocks above
    # args.trnNum is now set by dataset-specific configurations or defaults
    # To ensure full training data is used, we'll rely on the dataset handler's defaults
    # for trnNum unless explicitly set by a dataset config block.
    # If a dataset config block (e.g., for 'amazon') sets args.trnNum, that will be used.
    # Otherwise, the DataHandler will likely use all available training users.

    args.tstEpoch = 3  # Test every 3 epochs (can be adjusted if needed for full runs)

    # Set save path
    args.save_path = f'hardgnn_{dataset_name.lower()}_colab'

    return args

# Configure the dataset
configure_dataset(DATASET)

print("✅ HardGNN modules imported and configured successfully")
print(f"📊 Configuration for {DATASET.upper()} Dataset:")
print(f"  Dataset: {args.data}")
print(f"  Learning Rate: {args.lr}")
print(f"  Regularization: {args.reg}")
print(f"  Temperature (τ): {args.temp}")
print(f"  SSL Regularization: {args.ssl_reg}")
print(f"  Batch Size: {args.batch}")
print(f"  Graph Number: {args.graphNum}")
print(f"  GNN Layers: {args.gnn_layer}")
print(f"  Attention Layers: {args.att_layer}")
print("🔥 Hard Negative Sampling Configuration:")
print(f"  Enabled: {args.use_hard_neg}")
print(f"  Hard Negatives (K): {args.hard_neg_top_k}")
print(f"  Contrastive Weight (λ): {args.contrastive_weight}")


🔧 Importing HardGNN model...
✅ Using TensorFlow 2.x compatible HardGNN model (PRODUCTION)
TensorFlow version: 2.18.0
✅ Successfully imported HardGNN model
✅ HardGNN modules imported and configured successfully
📊 Configuration for GOWALLA Dataset:
  Dataset: gowalla
  Learning Rate: 0.002
  Regularization: 0.01
  Temperature (τ): 0.1
  SSL Regularization: 1e-06
  Batch Size: 512
  Graph Number: 3
  GNN Layers: 2
  Attention Layers: 1
🔥 Hard Negative Sampling Configuration:
  Enabled: True
  Hard Negatives (K): 5
  Contrastive Weight (λ): 0.1


In [5]:
# ========================================================================
# CELL 3: Load Dataset
# ========================================================================

# Initialize and load data
logger.saveDefault = True
log(f'🔄 Starting {DATASET} data loading...')

handler = DataHandler()
handler.LoadData()

log(f'✅ {DATASET} data loaded successfully')
print(f"📈 {DATASET.upper()} Dataset Statistics:")
print(f"  Users: {args.user:,}")
print(f"  Items: {args.item:,}")
print(f"  Training interactions: {handler.trnMat.nnz:,}")
print(f"  Test users: {len(handler.tstUsrs):,}")
print(f"  Time-based graphs: {len(handler.subMat)}")

2025-05-29 05:29:59.599535: 🔄 Starting gowalla data loading...
tstInt [None None None ... None None None]
tstStat [False False False ... False False False] 48653
tstUsrs [    7     8    21 ... 48625 48627 48636] 10000
trnMat <Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1141825 stored elements and shape (48653, 52621)>
  Coords	Values
  (0, 1)	28.0
  (0, 2)	2.0
  (0, 3)	1.0
  (0, 4)	1.0
  (0, 5)	1.0
  (0, 6)	14.0
  (0, 7)	2.0
  (0, 8)	2.0
  (0, 9)	1.0
  (0, 10)	1.0
  (0, 11)	1.0
  (0, 12)	1.0
  (0, 13)	1.0
  (0, 14)	1.0
  (0, 15)	2.0
  (0, 16)	1.0
  (0, 17)	1.0
  (0, 18)	1.0
  (0, 19)	1.0
  (0, 20)	1.0
  (0, 21)	1.0
  (0, 22)	1.0
  (0, 23)	1.0
  (0, 24)	1.0
  (0, 25)	1.0
  :	:
  (48649, 46304)	1.0
  (48650, 20437)	1.0
  (48650, 26212)	1.0
  (48650, 31914)	1.0
  (48650, 39530)	1.0
  (48650, 39533)	1.0
  (48650, 41640)	1.0
  (48650, 41641)	1.0
  (48650, 41642)	2.0
  (48650, 41643)	1.0
  (48650, 41644)	1.0
  (48650, 41645)	1.0
  (48650, 41646)	1.0
  (48650, 41647)	1.0
  (4

In [6]:
# ========================================================================
# CELL 4: Validate Contrastive Loss Component
# ========================================================================

print(f"🔍 Validating Hard Negative Sampling on {DATASET}...")
print(f"📊 Testing with τ={args.temp}, K={args.hard_neg_top_k}, λ={args.contrastive_weight}")

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.compat.v1.set_random_seed(42)

# Initialize TensorFlow session with GPU config
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True

with tf.compat.v1.Session(config=config) as sess:
    # Initialize HardGNN model
    model = Recommender(sess, handler)
    model.prepareModel()

    # Initialize variables
    init = tf.compat.v1.global_variables_initializer()
    sess.run(init)
    log('✅ Model initialized (random weights)')

    # Test contrastive loss on a small batch
    test_users = handler.tstUsrs[:32]  # Small batch for validation

    try:
        # Sample batch with hard negatives
        uLocs, iLocs, sequence, mask, uLocs_seq = model.sampleTrainBatch(
            test_users, handler.trnMat, handler.timeMat, train_sample_num=10
        )

        # Sample SSL batch
        suLocs, siLocs, suLocs_seq = model.sampleSslBatch(test_users, handler.subMat, False)

        # Prepare feed dict
        feed_dict = {
            model.uids: uLocs,
            model.iids: iLocs,
            model.sequence: sequence,
            model.mask: mask,
            model.is_train: False,
            model.uLocs_seq: uLocs_seq,
            model.keepRate: 1.0
        }

        for k in range(args.graphNum):
            feed_dict[model.suids[k]] = suLocs[k]
            feed_dict[model.siids[k]] = siLocs[k]
            feed_dict[model.suLocs_seq[k]] = suLocs_seq[k]

        # Run forward pass
        if hasattr(model, 'contrastive_loss'):
            results = sess.run([
                model.contrastive_loss,
                model.preLoss,
                model.posPred,
                model.negPred
            ], feed_dict=feed_dict)

            contrastive_loss, pre_loss, pos_pred, neg_pred = results

            print("\n" + "="*60)
            print(f"🎯 HARD NEGATIVE SAMPLING VALIDATION - {DATASET.upper()}")
            print("="*60)
            print(f"📊 Metrics:")
            print(f"  Contrastive Loss: {contrastive_loss:.6f}")
            print(f"  Supervised Loss: {pre_loss:.6f}")
            print(f"  Positive Predictions: {np.mean(pos_pred):.4f} ± {np.std(pos_pred):.4f}")
            print(f"  Negative Predictions: {np.mean(neg_pred):.4f} ± {np.std(neg_pred):.4f}")
            print(f"  Prediction Gap: {np.mean(pos_pred) - np.mean(neg_pred):.4f}")

            if np.mean(pos_pred) > np.mean(neg_pred):
                print("  ✅ Positive predictions > Negative predictions")
            else:
                print("  ⚠️  Negative predictions >= Positive predictions")

            if contrastive_loss > 0 and not np.isnan(contrastive_loss):
                print("  ✅ Hard negative sampling working correctly")
            else:
                print("  ⚠️  Issue with hard negative sampling")

            print(f"\n✅ Validation Complete - Ready for {DATASET.upper()} Training!")
            print("="*60)

        else:
            print("❌ Hard negative sampling not available")

    except Exception as e:
        print(f"⚠️  Validation error: {e}")
        print("Proceeding with training...")

🔍 Validating Hard Negative Sampling on gowalla...
📊 Testing with τ=0.1, K=5, λ=0.1
USER 48653 ITEM 52621


  data = coomat.data.astype(np.int32)


2025-05-29 05:30:36.182572: ✅ Model initialized (random weights)

🎯 HARD NEGATIVE SAMPLING VALIDATION - GOWALLA
📊 Metrics:
  Contrastive Loss: 5.054583
  Supervised Loss: 3.908865
  Positive Predictions: 16.3672 ± 9.8766
  Negative Predictions: 12.9260 ± 7.9002
  Prediction Gap: 3.4413
  ✅ Positive predictions > Negative predictions
  ✅ Hard negative sampling working correctly

✅ Validation Complete - Ready for GOWALLA Training!


In [None]:
# ========================================================================
# CELL 5: Train HardGNN Model
# ========================================================================

print(f"🚀 Starting HardGNN Training on {DATASET.upper()}...")
print(f"📊 Training Configuration:")
print(f"  Dataset: {args.data}")
print(f"  Epochs: {args.epoch}")
print(f"  Test Frequency: Every {args.tstEpoch} epochs")
print(f"  Training Instances: {args.trnNum}")
print(f"  Batch Size: {args.batch}")
print(f"  Learning Rate: {args.lr}")
print(f"  Regularization: {args.reg}")

# Start fresh session for training
tf.compat.v1.reset_default_graph()

# Also reset NNLayers_tf2 global parameter tracking
from Utils import NNLayers_tf2 # Import the module
print(f"[DEBUG CELL 5] NNLayers_tf2.params before reset: {list(NNLayers_tf2.params.keys()) if NNLayers_tf2.params else 'Empty or None'}")
NNLayers_tf2.reset_nn_params() # Call the reset function via the module
print(f"[DEBUG CELL 5] NNLayers_tf2.params after reset: {list(NNLayers_tf2.params.keys()) if NNLayers_tf2.params else 'Empty or None'}")


with tf.compat.v1.Session(config=config) as sess:
    # Initialize model
    model = Recommender(sess, handler)
    print(f"[DEBUG CELL 5] NNLayers_tf2.params just before model.prepareModel(): {list(NNLayers_tf2.params.keys()) if NNLayers_tf2.params else 'Empty or None'}")
    model.prepareModel()
    log('✅ Model prepared for training')

    # Initialize variables
    init = tf.compat.v1.global_variables_initializer()
    sess.run(init)
    log('✅ Variables initialized')

    # Training loop
    max_ndcg = 0.0
    max_res = dict()
    max_epoch = 0

    print("\n" + "="*80)
    print(f"🎯 TRAINING HARDGNN ON {DATASET.upper()} WITH HARD NEGATIVE SAMPLING")
    print("="*80)

    for ep in range(args.epoch):
        # Training step
        test = (ep % args.tstEpoch == 0)

        print(f"\n📚 Epoch {ep+1}/{args.epoch}")
        print("-" * 40)

        # Train for one epoch
        train_results = model.trainEpoch()

        # Print training results
        train_log = f"🏋️  Train: Loss={train_results['Loss']:.4f}, PreLoss={train_results['preLoss']:.4f}"
        if 'contrastiveLoss' in train_results:
            train_log += f", ContrastiveLoss={train_results['contrastiveLoss']:.4f}"
        print(train_log)

        # Test if it's a test epoch
        if test:
            test_results = model.testEpoch()
            test_log = f"🎯 Test: HR={test_results['HR']:.4f}, NDCG={test_results['NDCG']:.4f}"
            print(test_log)

            # Track best results
            if test_results['NDCG'] > max_ndcg:
                max_ndcg = test_results['NDCG']
                max_res = test_results.copy()
                max_epoch = ep
                print(f"🌟 New best NDCG: {max_ndcg:.4f}")

    # Final test
    print("\n" + "="*80)
    print("📊 FINAL RESULTS")
    print("="*80)

    final_results = model.testEpoch()
    print(f"🎯 Final Test Results:")
    print(f"  HR@10: {final_results['HR']:.4f}")
    print(f"  NDCG@10: {final_results['NDCG']:.4f}")

    print(f"\n🏆 Best Results (Epoch {max_epoch}):")
    print(f"  Best HR@10: {max_res.get('HR', 0):.4f}")
    print(f"  Best NDCG@10: {max_res.get('NDCG', 0):.4f}")

    print(f"\n✅ HardGNN training on {DATASET.upper()} completed successfully!")
    print("="*80)

🚀 Starting HardGNN Training on GOWALLA...
📊 Training Configuration:
  Dataset: gowalla
  Epochs: 150
  Test Frequency: Every 3 epochs
  Training Instances: 10000
  Batch Size: 512
  Learning Rate: 0.002
  Regularization: 0.01
[DEBUG CELL 5] NNLayers_tf2.params before reset: ['uEmbed', 'iEmbed', 'posEmbed', 'timeEmbed', 'defaultParamName1', 'defaultParamName2', 'defaultParamName3', 'defaultParamName4', 'defaultParamName5', 'defaultParamName6', 'defaultParamName7', 'defaultParamName8', 'defaultParamName9', 'defaultParamName10', 'defaultParamName11', 'defaultParamName12', 'meta2', 'meta2Bias', 'meta3', 'meta3Bias']
[DEBUG CELL 5] NNLayers_tf2.params after reset: Empty or None
USER 48653 ITEM 52621
[DEBUG CELL 5] NNLayers_tf2.params just before model.prepareModel(): Empty or None
2025-05-29 05:32:27.838482: ✅ Model prepared for training
2025-05-29 05:32:28.953265: ✅ Variables initialized

🎯 TRAINING HARDGNN ON GOWALLA WITH HARD NEGATIVE SAMPLING

📚 Epoch 1/150
-----------------------------

In [None]:
# ========================================================================
# CELL 6: Optional - Compare with Baseline SelfGNN
# ========================================================================

# To compare with baseline, run this cell to train without hard negatives
print(f"🔬 Training Baseline SelfGNN on {DATASET.upper()} (without hard negatives) for comparison...")

# Disable hard negative sampling
args.use_hard_neg = False
print(f"📊 Baseline Configuration: Hard Negative Sampling = {args.use_hard_neg}")

# Reset graph and train baseline
tf.compat.v1.reset_default_graph()

with tf.compat.v1.Session(config=config) as sess:
    # Initialize baseline model
    baseline_model = Recommender(sess, handler)
    baseline_model.prepareModel()

    # Initialize variables
    init = tf.compat.v1.global_variables_initializer()
    sess.run(init)
    log('✅ Baseline model initialized')

    print("\n" + "="*60)
    print(f"📊 BASELINE SELFGNN TRAINING ON {DATASET.upper()}")
    print("="*60)

    baseline_max_ndcg = 0.0
    baseline_max_res = dict()

    # Shorter training for comparison
    for ep in range(min(15, args.epoch)):
        test = (ep % args.tstEpoch == 0)

        # Train
        train_results = baseline_model.trainEpoch()
        train_log = f"Epoch {ep+1}: Loss={train_results['Loss']:.4f}, PreLoss={train_results['preLoss']:.4f}"
        print(train_log)

        # Test
        if test:
            test_results = baseline_model.testEpoch()
            test_log = f"  Test: HR={test_results['HR']:.4f}, NDCG={test_results['NDCG']:.4f}"
            print(test_log)

            if test_results['NDCG'] > baseline_max_ndcg:
                baseline_max_ndcg = test_results['NDCG']
                baseline_max_res = test_results.copy()

    print(f"\n📊 Baseline Best Results:")
    print(f"  HR@10: {baseline_max_res.get('HR', 0):.4f}")
    print(f"  NDCG@10: {baseline_max_res.get('NDCG', 0):.4f}")

    print(f"\n🔍 Comparison Summary for {DATASET.upper()}:")
    improvement_hr = (max_res.get('HR', 0) - baseline_max_res.get('HR', 0)) / baseline_max_res.get('HR', 1) * 100
    improvement_ndcg = (max_res.get('NDCG', 0) - baseline_max_res.get('NDCG', 0)) / baseline_max_res.get('NDCG', 1) * 100

    print(f"  HardGNN vs Baseline HR@10: {improvement_hr:+.2f}%")
    print(f"  HardGNN vs Baseline NDCG@10: {improvement_ndcg:+.2f}%")

    if improvement_ndcg > 0:
        print("  ✅ HardGNN shows improvement over baseline!")
    else:
        print("  📝 Note: Longer training may be needed to see improvements")

    print("="*60)

In [None]:
# ========================================================================
# CELL 7: Results Analysis and Summary
# ========================================================================

print(f"""
# 📈 Results Analysis - {DATASET.upper()} Dataset

## Key Metrics to Monitor:

1. **Contrastive Loss**: Should decrease over epochs, indicating better separation
2. **HR@10**: Hit Ratio at 10 - higher is better
3. **NDCG@10**: Normalized Discounted Cumulative Gain - higher is better
4. **Prediction Gap**: Positive predictions should exceed negative predictions

## HardGNN vs Baseline:
- **Hard Negative Sampling** selects more challenging negatives using cosine similarity
- **InfoNCE Loss** creates better decision boundaries with temperature scaling
- **Integrated Training** balances supervised and contrastive objectives

## 🎉 Summary

You've successfully run **HardGNN** on the {DATASET.upper()} dataset!

### What we accomplished:
✅ **Used Validated Configuration**: Original proven hyperparameters for {DATASET}
✅ **Hard Negative Sampling**: Cosine similarity-based selection of challenging negatives
✅ **InfoNCE Contrastive Loss**: Temperature-scaled contrastive learning (τ=0.1)
✅ **Integrated Training**: Balanced supervised + contrastive objectives (λ=0.1)
✅ **GPU Acceleration**: Optimized for Colab Pro+ GPUs
✅ **Dataset-Agnostic**: Works with any supported dataset

### Key Takeaways:
- **Validated Parameters**: Used proven configurations from original experiments
- **Hard Negative Enhancement**: Added challenging negative sampling to improve learning
- **Contrastive Learning**: InfoNCE loss helps create better decision boundaries
- **Minimal Changes**: Only added hard negative sampling, kept everything else identical

### Configuration Used for {DATASET.upper()}:
- **Learning Rate**: {args.lr}
- **Regularization**: {args.reg}
- **Graph Number**: {args.graphNum}
- **GNN Layers**: {args.gnn_layer}
- **Attention Layers**: {args.att_layer}
- **Temperature (τ)**: {args.temp}
- **Hard Negatives (K)**: {args.hard_neg_top_k}
- **Contrastive Weight (λ)**: {args.contrastive_weight}

### Next Steps:
- Try longer training (up to 150 epochs) for better convergence
- Experiment with different K values (3, 5, 10) for hard negatives
- Test different contrastive weights λ (0.05, 0.1, 0.2)
- Compare with other datasets by changing DATASET parameter
- Analyze attention patterns and embedding quality

### To Run on Different Datasets:
Change the DATASET parameter in Cell 1:
```python
DATASET = 'yelp'      # or 'amazon', 'gowalla', 'movielens'
```

**Citation**: This implementation extends the SelfGNN framework with hard negative sampling as described in Liu et al. (2024).
""")

# ========================================================================
# END OF SCRIPT
# ========================================================================