In [1]:
!pip install cornac'
!pip install recommenders
import os
import sys
import time
import numpy as np
import pandas as pd
import torch
import cornac
import psutil
import matplotlib.pyplot as plt
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED
from recommenders.evaluation.python_evaluation import (
    map,
    ndcg_at_k,
    precision_at_k,
    recall_at_k,
)

# Print versions for reference
print(f"System version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"Cornac version: {cornac.__version__}")

/bin/bash: -c: line 1: unexpected EOF while looking for matching `''
/bin/bash: -c: line 2: syntax error: unexpected end of file
Collecting recommenders
  Downloading recommenders-1.2.1-py3-none-any.whl.metadata (13 kB)
Collecting category-encoders<3,>=2.6.0 (from recommenders)
  Downloading category_encoders-2.8.1-py3-none-any.whl.metadata (7.9 kB)
Collecting cornac<3,>=1.15.2 (from recommenders)
  Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.4/51.4 kB[0m [31m989.0 kB/s[0m eta [36m0:00:00[0m
Collecting locust<3,>=2.12.2 (from recommenders)
  Downloading locust-2.37.1-py3-none-any.whl.metadata (9.4 kB)
Collecting memory-profiler<1,>=0.61.0 (from recommenders)
  Downloading memory_profiler-0.61.0-py3-none-any.whl.metadata (20 kB)
Collecting retrying<2,>=1.3.4 (from recommenders)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Collecting scikit-surprise>=1.1.3 (from recom

In [2]:
MOVIELENS_DATA_SIZE = '100k'
TOP_K = 10
LATENT_DIM = 50
ENCODER_DIMS = [100]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
LEARNING_RATE = 0.005  # As requested

# Hyperparameters to test
hyperparameters = [
    {"epochs": 50, "batch_size": 256},
    {"epochs": 50, "batch_size": 1024},
    {"epochs": 100, "batch_size": 256},
    {"epochs": 100, "batch_size": 1024},
    {"epochs": 500, "batch_size": 128}
]

In [3]:
def calculate_diversity(topk_items, item_popularity):

    avg_popularity = np.mean([item_popularity.get(item, 0) for item in topk_items['itemID'].unique()])
    unique_items = len(topk_items['itemID'].unique())
    return {
        'avg_popularity': avg_popularity,
        'unique_items': unique_items
    }


def calculate_popularity_bias(topk_items, item_popularity, top_p=0.2):
    """Calculate popularity bias in recommendations."""
    # Sort items by popularity
    sorted_items = sorted(item_popularity.items(), key=lambda x: x[1], reverse=True)
    # Get top p% popular items
    n_top = int(len(sorted_items) * top_p)
    top_popular = set([item for item, _ in sorted_items[:n_top]])

    # Calculate percentage of recommendations from top popular items
    reco_items = topk_items['itemID'].unique()
    top_popular_in_reco = [item for item in reco_items if item in top_popular]
    popular_ratio = len(top_popular_in_reco) / len(reco_items) if len(reco_items) > 0 else 0

    return {
        'popular_ratio': popular_ratio,
        'top_p_percent': top_p
    }


In [10]:
def main():

    try:
        from tqdm import tqdm
    except ImportError:
        !pip install tqdm
        from tqdm import tqdm

    print("Loading MovieLens data...")
    data = movielens.load_pandas_df(
        size=MOVIELENS_DATA_SIZE,
        header=["userID", "itemID", "rating"]
    )
    print(f"Data loaded: {len(data)} ratings")


    train, test = python_random_split(data, 0.75)
    print(f"Train set size: {len(train)}, Test set size: {len(test)}")


    train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)
    print(f'Number of users: {train_set.num_users}')
    print(f'Number of items: {train_set.num_items}')


    item_popularity = train.groupby('itemID').size().to_dict()


    results = []

    for params in tqdm(hyperparameters, desc="Testing hyperparameters"):
        print(f"\n{'='*50}")
        print(f"Training with epochs={params['epochs']}, batch_size={params['batch_size']}")
        print(f"{'='*50}")


        bivae = cornac.models.BiVAECF(
            k=LATENT_DIM,
            encoder_structure=ENCODER_DIMS,
            act_fn=ACT_FUNC,
            likelihood=LIKELIHOOD,
            n_epochs=params['epochs'],
            batch_size=params['batch_size'],
            learning_rate=LEARNING_RATE,  # Using 0.005 as requested
            seed=SEED,
            use_gpu=torch.cuda.is_available(),
            verbose=True
        )


        memory_before = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 # in MB


        with Timer() as training_timer:
            bivae.fit(train_set)
        training_time = training_timer.interval
        print(f"Training time: {training_time:.2f} seconds")


        memory_after = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 # in MB
        memory_used = memory_after - memory_before


        with Timer() as prediction_timer:
            all_predictions = predict_ranking(bivae, train, usercol='userID', itemcol='itemID', remove_seen=True)
        prediction_time = prediction_timer.interval
        print(f"Prediction time: {prediction_time:.2f} seconds")


        eval_map = map(test, all_predictions, col_prediction='prediction', k=TOP_K)
        eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
        eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
        eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

        print(f"MAP: {eval_map:.6f}")
        print(f"NDCG: {eval_ndcg:.6f}")
        print(f"Precision@{TOP_K}: {eval_precision:.6f}")
        print(f"Recall@{TOP_K}: {eval_recall:.6f}")


        diversity = calculate_diversity(all_predictions, item_popularity)
        popularity_bias = calculate_popularity_bias(all_predictions, item_popularity)

        print(f"Recommendation Diversity:")
        print(f"  Average Popularity: {diversity['avg_popularity']:.6f}")
        print(f"  Unique Items: {diversity['unique_items']}")

        print(f"Popularity Bias:")
        print(f"  Popular Ratio: {popularity_bias['popular_ratio']:.6f} (Top {popularity_bias['top_p_percent']*100}% of items)")


        results.append({
            'epochs': params['epochs'],
            'batch_size': params['batch_size'],
            'training_time': training_time,
            'prediction_time': prediction_time,
            'map': eval_map,
            'ndcg': eval_ndcg,
            'precision': eval_precision,
            'recall': eval_recall,
            'avg_popularity': diversity['avg_popularity'],
            'unique_items': diversity['unique_items'],
            'popular_ratio': popularity_bias['popular_ratio'],
            'memory_usage_mb': memory_used
        })


    results_df = pd.DataFrame(results)
    csv_file = 'bivae_hyperparameter_results.csv'
    results_df.to_csv(csv_file, index=False)
    print(f"\nResults saved to {csv_file}")

if __name__ == "__main__":
    main()

Loading MovieLens data...


100%|██████████| 4.81k/4.81k [00:00<00:00, 8.98kKB/s]


Data loaded: 100000 ratings
Train set size: 75000, Test set size: 25000
Number of users: 943
Number of items: 1642


Testing hyperparameters:   0%|          | 0/5 [00:00<?, ?it/s]


Training with epochs=50, batch_size=256


  0%|          | 0/50 [00:00<?, ?it/s]

Training time: 20.83 seconds
Prediction time: 2.02 seconds


Testing hyperparameters:  20%|██        | 1/5 [00:33<02:14, 33.57s/it]

MAP: 0.069509
NDCG: 0.231539
Precision@10: 0.197558
Recall@10: 0.137355
Recommendation Diversity:
  Average Popularity: 45.676005
  Unique Items: 1642
Popularity Bias:
  Popular Ratio: 0.199756 (Top 20.0% of items)

Training with epochs=50, batch_size=1024


  0%|          | 0/50 [00:00<?, ?it/s]

Training time: 11.72 seconds
Prediction time: 1.81 seconds


Testing hyperparameters:  40%|████      | 2/5 [00:57<01:23, 27.87s/it]

MAP: 0.096456
NDCG: 0.365407
Precision@10: 0.324735
Recall@10: 0.172576
Recommendation Diversity:
  Average Popularity: 45.676005
  Unique Items: 1642
Popularity Bias:
  Popular Ratio: 0.199756 (Top 20.0% of items)

Training with epochs=100, batch_size=256


  0%|          | 0/100 [00:00<?, ?it/s]

Training time: 25.79 seconds
Prediction time: 1.80 seconds


Testing hyperparameters:  60%|██████    | 3/5 [01:35<01:05, 32.59s/it]

MAP: 0.116804
NDCG: 0.409785
Precision@10: 0.357643
Recall@10: 0.193858
Recommendation Diversity:
  Average Popularity: 45.676005
  Unique Items: 1642
Popularity Bias:
  Popular Ratio: 0.199756 (Top 20.0% of items)

Training with epochs=100, batch_size=1024


  0%|          | 0/100 [00:00<?, ?it/s]

Training time: 24.15 seconds
Prediction time: 1.79 seconds


Testing hyperparameters:  80%|████████  | 4/5 [02:12<00:34, 34.16s/it]

MAP: 0.111018
NDCG: 0.400533
Precision@10: 0.349045
Recall@10: 0.184418
Recommendation Diversity:
  Average Popularity: 45.676005
  Unique Items: 1642
Popularity Bias:
  Popular Ratio: 0.199756 (Top 20.0% of items)

Training with epochs=500, batch_size=128


  0%|          | 0/500 [00:00<?, ?it/s]

Training time: 155.48 seconds
Prediction time: 1.78 seconds


Testing hyperparameters: 100%|██████████| 5/5 [05:00<00:00, 60.00s/it]

MAP: 0.131964
NDCG: 0.439352
Precision@10: 0.382166
Recall@10: 0.208614
Recommendation Diversity:
  Average Popularity: 45.676005
  Unique Items: 1642
Popularity Bias:
  Popular Ratio: 0.199756 (Top 20.0% of items)

Results saved to bivae_hyperparameter_results.csv





In [11]:
import os
import sys
import time
import numpy as np
import pandas as pd
import torch
import cornac
import psutil
import matplotlib.pyplot as plt
from collections import defaultdict
from tqdm.notebook import tqdm  # For progress bars
from google.colab import drive

# Install necessary packages
!pip install cornac recommenders psutil matplotlib tqdm

# Import necessary modules
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED
from recommenders.evaluation.python_evaluation import (
    map,
    ndcg_at_k,
    precision_at_k,
    recall_at_k,
)

# Print versions for reference
print(f"System version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"Cornac version: {cornac.__version__}")

System version: 3.11.12 (main, Apr  9 2025, 08:55:54) [GCC 11.4.0]
PyTorch version: 2.6.0+cu124
Cornac version: 2.3.3


In [12]:
# Mount Google Drive to save results
drive.mount('/content/drive')

# Define constants
MOVIELENS_DATA_SIZE = '100k'
TOP_K = 10
LATENT_DIM = 50
ENCODER_DIMS = [100]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
LEARNING_RATE = 0.005

# Set hyperparameters for the BiVAE model
EPOCHS = 50
BATCH_SIZE = 1024

# Define paths for saving results
RESULTS_DIR = '/content/drive/MyDrive/bivae_results1234'
os.makedirs(RESULTS_DIR, exist_ok=True)

def count_interactions(df, column_name):
    """Count interactions per user/item"""
    return df.groupby(column_name).size()

def create_interaction_buckets(counts, num_buckets=5):
    """Create buckets based on number of interactions"""
    # Get quantiles to create roughly equal-sized buckets
    quantiles = np.linspace(0, 1, num_buckets + 1)[1:-1]
    thresholds = [1] + [int(t) for t in np.quantile(counts, quantiles)] + [float('inf')]

    # Create buckets
    buckets = {}
    for i in range(len(thresholds) - 1):
        min_val = thresholds[i]
        max_val = thresholds[i+1]
        if min_val == max_val:
            bucket_name = f"{min_val}"
        else:
            bucket_name = f"{min_val}-{max_val-1}" if max_val != float('inf') else f"{min_val}+"

        buckets[bucket_name] = [id for id, count in counts.items()
                               if min_val <= count < max_val]

    print("Interaction buckets:")
    for bucket, ids in buckets.items():
        print(f"  {bucket}: {len(ids)} entities")

    return buckets

def evaluate_cold_start(model, train_df, test_df, buckets, entity_type, k=TOP_K):
    """Evaluate model performance on different cold-start buckets"""
    results = {}

    for bucket, ids in buckets.items():
        print(f"Evaluating {entity_type} bucket: {bucket}")

        # Filter test data based on bucket
        if entity_type == 'user':
            bucket_test = test_df[test_df['userID'].isin(ids)]
        else:  # item
            bucket_test = test_df[test_df['itemID'].isin(ids)]

        if len(bucket_test) == 0:
            print(f"  No test data for {entity_type} bucket {bucket}, skipping")
            continue

        # Generate predictions for this bucket
        with Timer() as pred_time:
            # Fixed: Don't pass users or items parameters to predict_ranking
            # Instead, filter the predictions afterward
            topk_scores = predict_ranking(model, train_df, usercol='userID', itemcol='itemID',
                                         remove_seen=True)

            # Filter predictions to only include relevant users/items for this bucket
            if entity_type == 'user':
                topk_scores = topk_scores[topk_scores['userID'].isin(ids)]
            else:  # item
                topk_scores = topk_scores[topk_scores['itemID'].isin(ids)]

        # Only keep predictions that match test data for the current bucket
        bucket_scores = pd.merge(bucket_test[['userID', 'itemID']], topk_scores,
                                on=['userID', 'itemID'], how='left')

        # Calculate metrics
        bucket_results = {}
        bucket_results['count'] = len(bucket_test)
        bucket_results['prediction_time'] = pred_time.interval
        bucket_results['map'] = map(bucket_test, bucket_scores, k=k)
        bucket_results['ndcg'] = ndcg_at_k(bucket_test, bucket_scores, k=k)
        bucket_results['precision'] = precision_at_k(bucket_test, bucket_scores, k=k)
        bucket_results['recall'] = recall_at_k(bucket_test, bucket_scores, k=k)

        results[bucket] = bucket_results
        print(f"  MAP: {bucket_results['map']:.4f}, NDCG: {bucket_results['ndcg']:.4f}")

    return results

def main():
    # Load data
    print("Loading MovieLens data...")
    df = movielens.load_pandas_df(
        size=MOVIELENS_DATA_SIZE,
        header=["userID", "itemID", "rating", "timestamp"]
    )
    print("MovieLens data loaded successfully")

    # Count interactions per user and item
    user_counts = count_interactions(df, 'userID')
    item_counts = count_interactions(df, 'itemID')

    print(f"User interaction statistics:")
    print(f"  Min: {user_counts.min()}, Max: {user_counts.max()}, Median: {user_counts.median():.1f}, Mean: {user_counts.mean():.1f}")
    print(f"Item interaction statistics:")
    print(f"  Min: {item_counts.min()}, Max: {item_counts.max()}, Median: {item_counts.median():.1f}, Mean: {item_counts.mean():.1f}")

    # Create buckets based on interaction counts
    user_buckets = create_interaction_buckets(user_counts)
    item_buckets = create_interaction_buckets(item_counts)

    # Split data into train and test
    train, test = python_stratified_split(df, ratio=0.75)
    print(f"Train set size: {len(train)}, Test set size: {len(test)}")

    # Create Cornac dataset
    train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)
    print(f'Number of users: {train_set.num_users}')
    print(f'Number of items: {train_set.num_items}')

    # Train BiVAE model
    print(f"\n{'='*50}")
    print(f"Training BiVAE with epochs={EPOCHS}, batch_size={BATCH_SIZE}")
    print(f"{'='*50}")

    # Create model
    bivae = cornac.models.BiVAECF(
        k=LATENT_DIM,
        encoder_structure=ENCODER_DIMS,
        act_fn=ACT_FUNC,
        likelihood=LIKELIHOOD,
        n_epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        learning_rate=LEARNING_RATE,
        seed=SEED,
        use_gpu=torch.cuda.is_available(),
        verbose=True
    )

    # Memory usage before training
    memory_before = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024  # in MB

    # Train model
    with Timer() as training_timer:
        bivae.fit(train_set)
    training_time = training_timer.interval
    print(f"Training time: {training_time:.2f} seconds")

    # Memory usage after training
    memory_after = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024  # in MB
    memory_used = memory_after - memory_before
    print(f"Memory used: {memory_used:.2f} MB")

    # Overall model evaluation
    print("\nEvaluating overall model performance...")
    with Timer() as pred_time:
        all_predictions = predict_ranking(bivae, train, usercol='userID', itemcol='itemID', remove_seen=True)

    eval_map = map(test, all_predictions, k=TOP_K)
    eval_ndcg = ndcg_at_k(test, all_predictions, k=TOP_K)
    eval_precision = precision_at_k(test, all_predictions, k=TOP_K)
    eval_recall = recall_at_k(test, all_predictions, k=TOP_K)

    print(f"Overall performance metrics:")
    print(f"  MAP: {eval_map:.4f}")
    print(f"  NDCG@{TOP_K}: {eval_ndcg:.4f}")
    print(f"  Precision@{TOP_K}: {eval_precision:.4f}")
    print(f"  Recall@{TOP_K}: {eval_recall:.4f}")
    print(f"  Prediction time: {pred_time.interval:.2f} seconds")

    # Perform cold-start analysis
    print("\nPerforming cold-start analysis...")

    # Cold-start user analysis
    print("\nCold-start USER analysis:")
    user_results = evaluate_cold_start(bivae, train, test, user_buckets, 'user', k=TOP_K)

    # Cold-start item analysis
    print("\nCold-start ITEM analysis:")
    item_results = evaluate_cold_start(bivae, train, test, item_buckets, 'item', k=TOP_K)

    # Convert results to DataFrames for easier analysis
    user_df = pd.DataFrame.from_dict(user_results, orient='index')
    item_df = pd.DataFrame.from_dict(item_results, orient='index')

    # Save results
    user_df.to_csv(f'{RESULTS_DIR}/bivae_cold_start_user_analysis.csv')
    item_df.to_csv(f'{RESULTS_DIR}/bivae_cold_start_item_analysis.csv')

    print(f"\nCold-start user analysis results:")
    print(user_df)
    print(f"\nCold-start item analysis results:")
    print(item_df)

    # Aggregate results for reporting
    print("\nCold-start Performance Summary:")
    print("User interaction level impact on performance:")
    for metric in ['map', 'ndcg', 'precision', 'recall']:
        print(f"  {metric.upper()}: {' | '.join([f'{bucket}: {row[metric]:.4f}' for bucket, row in user_df.iterrows()])}")

    print("\nItem interaction level impact on performance:")
    for metric in ['map', 'ndcg', 'precision', 'recall']:
        print(f"  {metric.upper()}: {' | '.join([f'{bucket}: {row[metric]:.4f}' for bucket, row in item_df.iterrows()])}")

    # Clean up
    del bivae
    import gc
    gc.collect()

    print("\nCold-start analysis completed and results saved.")

if __name__ == "__main__":
    main()

Mounted at /content/drive
Loading MovieLens data...


100%|██████████| 4.81k/4.81k [00:00<00:00, 7.63kKB/s]


MovieLens data loaded successfully
User interaction statistics:
  Min: 20, Max: 737, Median: 65.0, Mean: 106.0
Item interaction statistics:
  Min: 1, Max: 583, Median: 27.0, Mean: 59.5
Interaction buckets:
  1-28: 187 entities
  29-49: 188 entities
  50-92: 187 entities
  93-170: 190 entities
  171+: 191 entities
Interaction buckets:
  1-4: 333 entities
  5-15: 339 entities
  16-42: 332 entities
  43-99: 340 entities
  100+: 338 entities
Train set size: 74992, Test set size: 25008
Number of users: 943
Number of items: 1653

Training BiVAE with epochs=50, batch_size=1024


  0%|          | 0/50 [00:00<?, ?it/s]

Training time: 12.33 seconds
Memory used: 0.00 MB

Evaluating overall model performance...
Overall performance metrics:
  MAP: 0.0897
  NDCG@10: 0.3410
  Precision@10: 0.3058
  Recall@10: 0.1608
  Prediction time: 1.65 seconds

Performing cold-start analysis...

Cold-start USER analysis:
Evaluating user bucket: 1-28
  MAP: 1.0000, NDCG: 1.0000
Evaluating user bucket: 29-49
  MAP: 0.9614, NDCG: 1.0000
Evaluating user bucket: 50-92
  MAP: 0.6249, NDCG: 1.0000
Evaluating user bucket: 93-170
  MAP: 0.3227, NDCG: 1.0000
Evaluating user bucket: 171+
  MAP: 0.1610, NDCG: 1.0000

Cold-start ITEM analysis:
Evaluating item bucket: 1-4
  MAP: 0.9850, NDCG: 1.0000
Evaluating item bucket: 5-15
  MAP: 0.9926, NDCG: 1.0000
Evaluating item bucket: 16-42
  MAP: 0.9790, NDCG: 1.0000
Evaluating item bucket: 43-99
  MAP: 0.9162, NDCG: 1.0000
Evaluating item bucket: 100+
  MAP: 0.7119, NDCG: 1.0000

Cold-start user analysis results:
        count  prediction_time       map  ndcg  precision    recall
1-28  