In [None]:

import sys
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import yaml
import time
from collections import defaultdict
import gc  # For garbage collection


!pip install recommenders psutil


print(f"TensorFlow version: {tf.__version__}")


from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
original_convert_sp_mat = LightGCN._convert_sp_mat_to_sp_tensor

def patched_convert_sp_mat(self, X):
    """Patch to replace np.mat with proper NumPy 2.0 compatible code"""
    coo = X.tocoo().astype(np.float32)
    indices = np.array([coo.row, coo.col]).transpose()
    return tf.SparseTensor(indices, coo.data, coo.shape)


LightGCN._convert_sp_mat_to_sp_tensor = patched_convert_sp_mat


tf.get_logger().setLevel('ERROR')


from recommenders.utils.timer import Timer
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.utils.notebook_utils import store_metadata
import psutil



TensorFlow version: 2.18.0


In [None]:

from google.colab import drive
drive.mount('/content/drive')


RESULTS_DIR = '/content/drive/MyDrive/lightgcn_results'
os.makedirs(RESULTS_DIR, exist_ok=True)

# Create local directories for model configs and checkpoints
os.makedirs('recommenders/models/deeprec/config', exist_ok=True)
os.makedirs('model_checkpoints', exist_ok=True)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

MOVIELENS_DATA_SIZE = '100k'
TOP_K = 10
SEED = DEFAULT_SEED

# Hyperparameters to experiment with
CONFIGS = [
    {'epochs': 50, 'batch_size': 1024, 'name': 'config1'},
    {'epochs': 50, 'batch_size': 256, 'name': 'config2'},
    {'epochs': 100, 'batch_size': 1024, 'name': 'config3'},
    {'epochs': 100, 'batch_size': 256, 'name': 'config4'}
]

def create_yaml_config(config_name, epochs, batch_size):
    """Create YAML config file for LightGCN."""
    yaml_config = {
        'model': {
            'name': 'lightgcn',
            'model_type': 'graphrec',
            'user_embedding_size': 64,
            'item_embedding_size': 64,
            'embed_size': 64,
            'n_layers': 3,
            'batch_size': batch_size,
            'show_step': 1,
            'learning_rate': 0.005,
            'epochs': epochs,
            'eval_epoch': 5,
            'top_k': TOP_K
        },
        'train': {
            'batch_size': batch_size,
            'epochs': epochs,
            'learning_rate': 0.005
        },
        'evaluate': {
            'metrics': ['map', 'ndcg', 'precision', 'recall'],
            'cutoffs': [10, 20],
            'k': 10
        }
    }

    yaml_file_path = f'recommenders/models/deeprec/config/lightgcn_{config_name}.yaml'
    with open(yaml_file_path, 'w') as f:
        yaml.dump(yaml_config, f)

    return yaml_file_path

def calculate_diversity(topk_items, item_popularity):
    """Calculate diversity of recommendations based on item popularity."""
    avg_popularity = np.mean([item_popularity.get(item, 0) for item in topk_items['itemID'].unique()])
    unique_items = len(topk_items['itemID'].unique())
    return {
        'avg_popularity': avg_popularity,
        'unique_items': unique_items
    }

def calculate_popularity_bias(topk_items, item_popularity, top_p=0.2):
    """Calculate popularity bias in recommendations."""
    # Sort items by popularity
    sorted_items = sorted(item_popularity.items(), key=lambda x: x[1], reverse=True)
    # Get top p% popular items
    n_top = int(len(sorted_items) * top_p)
    top_popular = set([item for item, _ in sorted_items[:n_top]])

    # Calculate percentage of recommendations from top popular items
    reco_items = topk_items['itemID'].unique()
    top_popular_in_reco = [item for item in reco_items if item in top_popular]
    popular_ratio = len(top_popular_in_reco) / len(reco_items) if len(reco_items) > 0 else 0

    return {
        'popular_ratio': popular_ratio,
        'top_p_percent': top_p
    }

def reset_tf_graph():
    """Reset TensorFlow graph in a way that works with both TF 1.x and 2.x"""
    if hasattr(tf, 'reset_default_graph'):

        tf.reset_default_graph()
    else:

        tf.compat.v1.reset_default_graph()



In [None]:
def main():
    # Load data
    print("Loading MovieLens data...")
    df = movielens.load_pandas_df(size=MOVIELENS_DATA_SIZE)
    print("MovieLens data loaded successfully")

    # Calculate item popularity
    item_popularity = df.groupby('itemID').size().to_dict()

    # Split data into train and test
    train, test = python_stratified_split(df, ratio=0.75)
    print(f"Train set size: {len(train)}, Test set size: {len(test)}")

    # Prepare data model once
    data = ImplicitCF(train=train, test=test, seed=SEED)

    # Results dataframe
    results = []

    # For each configuration
    for config in CONFIGS:
        try:
            print(f"\n{'='*50}")
            print(f"Running configuration: {config['name']}")
            print(f"Epochs: {config['epochs']}, Batch Size: {config['batch_size']}")
            print(f"{'='*50}")

            # Create config file
            yaml_file = create_yaml_config(
                config['name'],
                config['epochs'],
                config['batch_size']
            )

            # Prepare hyperparameters
            hparams = prepare_hparams(
                yaml_file,
                n_layers=3,
                batch_size=config['batch_size'],
                epochs=config['epochs'],
                learning_rate=0.005,
                eval_epoch=5,
                top_k=TOP_K,
                decay=0.0001,
                save_epoch=5,
                metrics=['map', 'ndcg', 'precision', 'recall'],
                MODEL_DIR=f"model_checkpoints/{config['name']}"
            )

            # Track memory before training
            memory_before = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024

            # Initialize model
            model = LightGCN(hparams, data)

            # Train model
            print("Starting model training...")

            with Timer() as train_time:
                model.fit()

            training_time = train_time.interval
            print(f"Training completed in {training_time:.2f} seconds")

            # Track memory after training
            memory_after = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024
            memory_usage = memory_after - memory_before

            # Prediction time
            with Timer() as pred_time:
                topk_scores = model.recommend_k_items(test, top_k=TOP_K, remove_seen=True)
            prediction_time = pred_time.interval

            # Evaluate model
            eval_map = map(test, topk_scores, k=TOP_K)
            eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
            eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
            eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

            print("Evaluation Metrics:")
            print(f"MAP@{TOP_K}: {eval_map:.4f}")
            print(f"NDCG@{TOP_K}: {eval_ndcg:.4f}")
            print(f"Precision@{TOP_K}: {eval_precision:.4f}")
            print(f"Recall@{TOP_K}: {eval_recall:.4f}")

            # Calculate diversity metrics
            diversity = calculate_diversity(topk_scores, item_popularity)
            popularity_bias = calculate_popularity_bias(topk_scores, item_popularity)

            # Store results
            result = {
                'config_name': config['name'],
                'epochs': config['epochs'],
                'batch_size': config['batch_size'],
                'map': eval_map,
                'ndcg': eval_ndcg,
                'precision': eval_precision,
                'recall': eval_recall,
                'training_time': training_time,
                'prediction_time': prediction_time,
                'memory_usage_mb': memory_usage,
                'avg_popularity': diversity['avg_popularity'],
                'unique_items': diversity['unique_items'],
                'popular_ratio': popularity_bias['popular_ratio'],
            }

            results.append(result)

            # Save interim results in case of later failure
            if len(results) > 0:
                interim_df = pd.DataFrame(results)
                interim_csv_path = f'{RESULTS_DIR}/lightgcn_evaluation_interim.csv'
                interim_df.to_csv(interim_csv_path, index=False)
                print(f"Interim results saved to '{interim_csv_path}'")

            # Clean up to free memory in Colab
            del model

            # Reset TensorFlow graph for next run
            reset_tf_graph()

            # Force garbage collection
            gc.collect()

        except Exception as e:
            print(f"Error in configuration {config['name']}: {str(e)}")
            # Continue with next configuration
            try:
                reset_tf_graph()
            except:
                print("Failed to reset TensorFlow graph, but continuing anyway.")
            gc.collect()

    # Create results dataframe and save to CSV
    if len(results) > 0:
        results_df = pd.DataFrame(results)
        csv_path = f'{RESULTS_DIR}/lightgcn_evaluation_results.csv'
        results_df.to_csv(csv_path, index=False)
        print(f"\nResults saved to '{csv_path}'")

        # Display summary table
        print("\nSummary of Results:")
        display(results_df[['config_name', 'epochs', 'batch_size', 'map', 'ndcg', 'precision', 'recall', 'training_time']])
    else:
        print("No results were collected. All configurations failed.")

if __name__ == "__main__":
    main()

Loading MovieLens data...


100%|██████████| 4.81k/4.81k [00:00<00:00, 12.0kKB/s]


MovieLens data loaded successfully
Train set size: 74992, Test set size: 25008

Running configuration: config1
Epochs: 50, Batch Size: 1024
Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.
Starting model training...
Epoch 1 (train)4.2s: train loss = 0.47168 = (mf)0.47143 + (embed)0.00024
Epoch 2 (train)5.3s: train loss = 0.29010 = (mf)0.28947 + (embed)0.00063
Epoch 3 (train)5.3s: train loss = 0.25365 = (mf)0.25286 + (embed)0.00079
Epoch 4 (train)3.2s: train loss = 0.23849 = (mf)0.23752 + (embed)0.00097
Save model to path /content/model_checkpoints/config1/epoch_5
Epoch 5 (train)2.2s + (eval)0.4s: train loss = 0.22635 = (mf)0.22525 + (embed)0.00111, map = 0.21589, ndcg = 0.34528, precision = 0.29873, recall = 0.15604
Epoch 6 (train)3.1s: train loss = 0.21993 = (mf)0.21872 + (embed)0.00122
Epoch 7 (train)2.0s: train loss = 0.21247 = (mf)0.21115 + (embed)0.00133
Epoch 8 (train)2.0s: train loss = 0.20166 = (mf)0.20020 + (embed)0.00146
Epoch 

Unnamed: 0,config_name,epochs,batch_size,map,ndcg,precision,recall,training_time
0,config1,50,1024,0.13961,0.458353,0.399576,0.216312,127.118077
1,config2,50,256,0.129929,0.433855,0.379639,0.208013,361.846689
2,config3,100,1024,0.13036,0.436666,0.385684,0.209662,233.892473
3,config4,100,256,0.115269,0.393208,0.346554,0.192624,718.983322


In [None]:
# Cold-Start Analysis for LightGCN
import pandas as pd
import numpy as np
import tensorflow as tf
import yaml
import time
from collections import defaultdict
import matplotlib.pyplot as plt
import gc

# Install necessary packages for Colab
!pip install recommenders psutil

# Fix NumPy deprecated function issue
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
original_convert_sp_mat = LightGCN._convert_sp_mat_to_sp_tensor

def patched_convert_sp_mat(self, X):
    """Patch to replace np.mat with proper NumPy 2.0 compatible code"""
    coo = X.tocoo().astype(np.float32)
    indices = np.array([coo.row, coo.col]).transpose()
    return tf.SparseTensor(indices, coo.data, coo.shape)

# Apply the patch
LightGCN._convert_sp_mat_to_sp_tensor = patched_convert_sp_mat

# Import required modules
from recommenders.utils.timer import Timer
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
import psutil

# Create Google Drive mount point to save results
from google.colab import drive
drive.mount('/content/drive')

# Define paths for saving results
RESULTS_DIR = '/content/drive/MyDrive/lightgcn_results'
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs('recommenders/models/deeprec/config', exist_ok=True)
os.makedirs('model_checkpoints', exist_ok=True)

# Constants
MOVIELENS_DATA_SIZE = '100k'
TOP_K = 10
SEED = DEFAULT_SEED
BATCH_SIZE = 1024
EPOCHS = 50

def reset_tf_graph():
    """Reset TensorFlow graph in a way that works with both TF 1.x and 2.x"""
    if hasattr(tf, 'reset_default_graph'):
        # TensorFlow 1.x
        tf.reset_default_graph()
    else:
        # TensorFlow 2.x - create a new graph and set as default
        tf.compat.v1.reset_default_graph()

def create_yaml_config(config_name, epochs, batch_size):
    """Create YAML config file for LightGCN."""
    yaml_config = {
        'model': {
            'name': 'lightgcn',
            'model_type': 'graphrec',
            'user_embedding_size': 64,
            'item_embedding_size': 64,
            'embed_size': 64,
            'n_layers': 3,
            'batch_size': batch_size,
            'show_step': 1,
            'learning_rate': 0.005,
            'epochs': epochs,
            'eval_epoch': 5,
            'top_k': TOP_K
        },
        'train': {
            'batch_size': batch_size,
            'epochs': epochs,
            'learning_rate': 0.005
        },
        'evaluate': {
            'metrics': ['map', 'ndcg', 'precision', 'recall'],
            'cutoffs': [10, 20],
            'k': 10
        }
    }

    yaml_file_path = f'recommenders/models/deeprec/config/lightgcn_{config_name}.yaml'
    with open(yaml_file_path, 'w') as f:
        yaml.dump(yaml_config, f)

    return yaml_file_path

def count_interactions(df, column_name):
    """Count interactions per user/item"""
    return df.groupby(column_name).size()

def create_interaction_buckets(counts, num_buckets=5):
    """Create buckets based on number of interactions"""
    # Get quantiles to create roughly equal-sized buckets
    quantiles = np.linspace(0, 1, num_buckets + 1)[1:-1]
    thresholds = [1] + [int(t) for t in np.quantile(counts, quantiles)] + [float('inf')]

    # Create buckets
    buckets = {}
    for i in range(len(thresholds) - 1):
        min_val = thresholds[i]
        max_val = thresholds[i+1]
        if min_val == max_val:
            bucket_name = f"{min_val}"
        else:
            bucket_name = f"{min_val}-{max_val-1}" if max_val != float('inf') else f"{min_val}+"

        buckets[bucket_name] = [id for id, count in counts.items()
                               if min_val <= count < max_val]

    print("Interaction buckets:")
    for bucket, ids in buckets.items():
        print(f"  {bucket}: {len(ids)} entities")

    return buckets

def evaluate_cold_start(model, test_df, buckets, entity_type, metrics, k=10):
    """Evaluate model performance on different cold-start buckets"""
    results = {}

    for bucket, ids in buckets.items():
        print(f"Evaluating {entity_type} bucket: {bucket}")

        # Filter test data based on bucket
        if entity_type == 'user':
            bucket_test = test_df[test_df['userID'].isin(ids)]
        else:  # item
            bucket_test = test_df[test_df['itemID'].isin(ids)]

        if len(bucket_test) == 0:
            print(f"  No test data for {entity_type} bucket {bucket}, skipping")
            continue

        # Get recommendations for filtered test data
        topk_scores = model.recommend_k_items(bucket_test, top_k=k, remove_seen=True)

        # Calculate metrics
        bucket_results = {}
        bucket_results['count'] = len(bucket_test)
        bucket_results['map'] = map(bucket_test, topk_scores, k=k)
        bucket_results['ndcg'] = ndcg_at_k(bucket_test, topk_scores, k=k)
        bucket_results['precision'] = precision_at_k(bucket_test, topk_scores, k=k)
        bucket_results['recall'] = recall_at_k(bucket_test, topk_scores, k=k)

        results[bucket] = bucket_results
        print(f"  MAP: {bucket_results['map']:.4f}, NDCG: {bucket_results['ndcg']:.4f}")

    return results

def main():
    # Load data
    print("Loading MovieLens data...")
    df = movielens.load_pandas_df(size=MOVIELENS_DATA_SIZE)
    print("MovieLens data loaded successfully")

    # Count interactions per user and item
    user_counts = count_interactions(df, 'userID')
    item_counts = count_interactions(df, 'itemID')

    print(f"User interaction statistics:")
    print(f"  Min: {user_counts.min()}, Max: {user_counts.max()}, Median: {user_counts.median():.1f}, Mean: {user_counts.mean():.1f}")
    print(f"Item interaction statistics:")
    print(f"  Min: {item_counts.min()}, Max: {item_counts.max()}, Median: {item_counts.median():.1f}, Mean: {item_counts.mean():.1f}")

    # Create buckets based on interaction counts
    user_buckets = create_interaction_buckets(user_counts)
    item_buckets = create_interaction_buckets(item_counts)

    # Split data into train and test
    train, test = python_stratified_split(df, ratio=0.75)
    print(f"Train set size: {len(train)}, Test set size: {len(test)}")

    # Prepare data model
    data = ImplicitCF(train=train, test=test, seed=SEED)

    # Create config file
    yaml_file = create_yaml_config('cold_start', EPOCHS, BATCH_SIZE)

    # Prepare hyperparameters
    hparams = prepare_hparams(
        yaml_file,
        n_layers=3,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        learning_rate=0.005,
        eval_epoch=5,
        top_k=TOP_K,
        decay=0.0001,
        save_epoch=5,
        metrics=['map', 'ndcg', 'precision', 'recall'],
        MODEL_DIR='model_checkpoints/cold_start'
    )

    # Train model
    try:
        print("Initializing and training model...")
        model = LightGCN(hparams, data)

        with Timer() as train_time:
            model.fit()

        print(f"Training completed in {train_time.interval:.2f} seconds")

        # Perform cold-start analysis
        print("\nPerforming cold-start analysis...")
        metrics = ['map', 'ndcg', 'precision', 'recall']

        # Cold-start user analysis
        print("\nCold-start USER analysis:")
        user_results = evaluate_cold_start(model, test, user_buckets, 'user', metrics, k=TOP_K)

        # Cold-start item analysis
        print("\nCold-start ITEM analysis:")
        item_results = evaluate_cold_start(model, test, item_buckets, 'item', metrics, k=TOP_K)

        # Convert results to DataFrames for easier analysis
        user_df = pd.DataFrame.from_dict(user_results, orient='index')
        item_df = pd.DataFrame.from_dict(item_results, orient='index')

        # Save results
        user_df.to_csv(f'{RESULTS_DIR}/cold_start_user_analysis.csv')
        item_df.to_csv(f'{RESULTS_DIR}/cold_start_item_analysis.csv')

        print(f"\nCold-start user analysis results:")
        print(user_df)
        print(f"\nCold-start item analysis results:")
        print(item_df)

        # Aggregate results for reporting
        print("\nCold-start Performance Summary:")
        print("User interaction level impact on performance:")
        for metric in ['map', 'ndcg', 'precision', 'recall']:
            print(f"  {metric.upper()}: {' | '.join([f'{bucket}: {row[metric]:.4f}' for bucket, row in user_df.iterrows()])}")

        print("\nItem interaction level impact on performance:")
        for metric in ['map', 'ndcg', 'precision', 'recall']:
            print(f"  {metric.upper()}: {' | '.join([f'{bucket}: {row[metric]:.4f}' for bucket, row in item_df.iterrows()])}")

        # Clean up
        del model
        reset_tf_graph()
        gc.collect()

    except Exception as e:
        print(f"Error during cold-start analysis: {str(e)}")
        try:
            reset_tf_graph()
        except:
            print("Failed to reset TensorFlow graph.")
        gc.collect()

if __name__ == "__main__":
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loading MovieLens data...


100%|██████████| 4.81k/4.81k [00:00<00:00, 11.6kKB/s]


MovieLens data loaded successfully
User interaction statistics:
  Min: 20, Max: 737, Median: 65.0, Mean: 106.0
Item interaction statistics:
  Min: 1, Max: 583, Median: 27.0, Mean: 59.5
Interaction buckets:
  1-28: 187 entities
  29-49: 188 entities
  50-92: 187 entities
  93-170: 190 entities
  171+: 191 entities
Interaction buckets:
  1-4: 333 entities
  5-15: 339 entities
  16-42: 332 entities
  43-99: 340 entities
  100+: 338 entities
Train set size: 74992, Test set size: 25008
Initializing and training model...
Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.
Epoch 1 (train)2.2s: train loss = 0.47168 = (mf)0.47143 + (embed)0.00024
Epoch 2 (train)2.0s: train loss = 0.29010 = (mf)0.28947 + (embed)0.00063
Epoch 3 (train)2.0s: train loss = 0.25365 = (mf)0.25286 + (embed)0.00079
Epoch 4 (train)2.0s: train loss = 0.23849 = (mf)0.23752 + (embed)0.00097
Save model to path /content/model_checkpoints/cold_start/epoch_5
Epoch 5 (train)3.1s + (e