In [1]:
import os
from typing import Tuple, Callable, Dict, Optional, List

import pandas as pd
import numpy as np
import scipy.sparse as sp
from tqdm import tqdm
from sklearn.metrics import average_precision_score
from sklearn.model_selection import train_test_split

from Evaluation.Evaluator import EvaluatorHoldout

ModuleNotFoundError: No module named 'Evaluation'

In [None]:
URM = pd.read_csv("./data/data_train.csv")
ICM = pd.read_csv("./data/data_ICM_metadata.csv")

In [None]:
unique_users = URM.user_id.unique()
max_user_id = unique_users.max()
min_user_id = unique_users.min()
num_users = unique_users.size
print(f"Max User ID: {max_user_id}, Min User ID: {min_user_id}, Number of IDs: {num_users}")

unique_items = ICM.item_id.unique()
max_item_id = ICM.item_id.max()
min_item_id = ICM.item_id.min()
num_items = unique_items.size
unique_features = ICM.feature_id.unique()
num_features = unique_features.size
min_features_id = ICM.feature_id.min()
max_features_id = ICM.feature_id.max()
print(f"Max Item ID: {max_item_id}, Min Item ID: {min_item_id}, Number of IDs: {num_items}")
print(f"Max Feature ID: {max_features_id}, Min Item ID: {min_features_id}, Number of IDs: {num_features}")

In [None]:
def dataset_splits(ratings, num_users, num_items, validation_percentage: float, testing_percentage: float):
    seed = 1234

    # Construct the whole URM as a sparse matrix
    urm_all = sp.csr_matrix((ratings.data, (ratings.user_id, ratings.item_id)),
                            shape=(num_users, num_items))

    # Split into train + validation and test sets
    train_val_indices, test_indices = train_test_split(
        np.arange(len(ratings)),
        test_size=testing_percentage,
        shuffle=True,
        random_state=seed
    )
    
    # Split train + validation into train and validation
    train_indices, val_indices = train_test_split(
        train_val_indices,
        test_size=validation_percentage / (1 - testing_percentage),
        shuffle=True,
        random_state=seed
    )
    
    # Get user, item, and rating data for each set
    train_data = ratings.iloc[train_indices]
    val_data = ratings.iloc[val_indices]
    test_data = ratings.iloc[test_indices]
    
    # Construct sparse matrices
    urm_train = sp.csr_matrix((train_data.data, (train_data.user_id, train_data.item_id)), 
                              shape=(num_users, num_items))
    urm_validation = sp.csr_matrix((val_data.data, (val_data.user_id, val_data.item_id)), 
                                   shape=(num_users, num_items))
    urm_test = sp.csr_matrix((test_data.data, (test_data.user_id, test_data.item_id)), 
                             shape=(num_users, num_items))

    return urm_all, urm_train, urm_validation, urm_test


In [None]:
urm_all, urm_train, urm_validation, urm_test = dataset_splits(URM, 
                                                     num_users=num_users, 
                                                     num_items=num_items, 
                                                     validation_percentage=0.10, 
                                                     testing_percentage=0.20)

In [2]:
icm_matrix = sp.csr_matrix((ICM.data, (ICM.item_id, ICM.feature_id)), 
                           shape=(num_items, num_features))


NameError: name 'ICM' is not defined

# Optimization

In [None]:
def optimize_rp3_slim_hybrid(urm_train, icm, urm_validation, n_trials=30, output_folder="optimization_results"):
    os.makedirs(output_folder, exist_ok=True)
    log_file = os.path.join(output_folder, f'rp3_slim_optimization_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
    
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_file),
            logging.StreamHandler()
        ]
    )
    
    evaluator_validation = EvaluatorHoldout(urm_validation, cutoff_list=[10])  # Set cutoff to 10
    
    def objective(trial):
        try:
            # RP3beta parameters
            rp3beta_params = {
                "topK": trial.suggest_int("rp3_topK", 10, 200),
                "alpha": trial.suggest_float("rp3_alpha", 0.1, 1.0),
                "beta": trial.suggest_float("rp3_beta", 0.1, 1.0),
                "delta": trial.suggest_float("rp3_delta", 0.0, 1.0),
                "min_rating": trial.suggest_float("rp3_min_rating", 0.0, 5.0),
                "implicit": trial.suggest_categorical("rp3_implicit", [True, False]),
                "normalize_similarity": trial.suggest_categorical("rp3_normalize", [True, False])
            }
            
            # SLIM parameters
            slim_params = {
                "l1_ratio": trial.suggest_float("slim_l1_ratio", 0.0, 1.0),
                "alpha": trial.suggest_float("slim_alpha", 1e-4, 1e1, log=True),
                "positive_only": trial.suggest_categorical("slim_positive_only", [True, False]),
                "topK": trial.suggest_int("slim_topK", 10, 200),
                "do_feature_selection": trial.suggest_categorical("do_feature_selection", [True])
            }
            
            # Hybrid weights
            hybrid_params = {
                "rp3beta_weight": trial.suggest_float("rp3beta_weight", 0.0, 1.0),
                "slim_weight": trial.suggest_float("slim_weight", 0.0, 1.0)
            }
            
            # Train RP3beta
            rp3beta_recommender = RP3betaRecommenderICM(URM_train=urm_train, ICM=icm)
            rp3beta_recommender.fit(**rp3beta_params)
            
            # Train SLIM
            slim_recommender = SLIMElasticNetRecommender(URM_train=urm_train)
            slim_recommender.fit(**slim_params)
            
            # Combine into Hybrid
            hybrid_recommender = HybridRecommender(
                URM_train=urm_train,
                recommender_list=[rp3beta_recommender, slim_recommender],
                recommender_weights=[hybrid_params["rp3beta_weight"], hybrid_params["slim_weight"]]
            )
            
            # Evaluate using Recall@10
            results_df, _ = evaluator_validation.evaluateRecommender(hybrid_recommender)
            recall_at_10 = results_df.loc[10]["RECALL"]
            
            logging.info(f"\nTrial {trial.number} results:")
            logging.info(f"Recall@10: {recall_at_10}")
            logging.info(f"RP3beta weight: {hybrid_recommender.recommender_weights[0]:.3f}")
            logging.info(f"SLIM weight: {hybrid_recommender.recommender_weights[1]:.3f}")
            
            return recall_at_10
            
        except Exception as e:
            logging.error(f"Error in trial {trial.number}: {str(e)}")
            raise optuna.exceptions.TrialPruned()
    
    # Create Optuna study and optimize
    study = optuna.create_study(direction="maximize")
    
    try:
        study.optimize(objective, n_trials=n_trials)
        
        best_params = study.best_params
        best_recall = study.best_value
        
        logging.info("\nOptimization completed!")
        logging.info(f"Best Recall@10: {best_recall}")
        logging.info("Best parameters:")
        logging.info(json.dumps(best_params, indent=2))
        
        # Optional: Save visualization plots
        try:
            from optuna.visualization import plot_optimization_history, plot_param_importances
            plot_optimization_history(study).write_image(os.path.join(output_folder, "optimization_history.png"))
            plot_param_importances(study).write_image(os.path.join(output_folder, "param_importances.png"))
        except ImportError:
            logging.warning("Plotly not installed. Skipping visualization generation.")
        
        return best_params, best_recall
        
    except Exception as e:
        logging.error(f"Optimization failed: {str(e)}")
        raise


In [None]:
best_params, best_recall = optimize_rp3_slim_hybrid(
    urm_train=urm_train,
    icm=icm_matrix,
    urm_validation=urm_validation,
    n_trials=2
)

print("Best Recall@10:", best_recall)
print("Best Parameters:", best_params)


In [None]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommenderICM import RP3betaRecommenderICM

import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

def train_hybrid_with_best_params(urm_all, icm, best_params):
    # RP3beta parameters
    rp3beta_params = {
        "topK": best_params["rp3_topK"],
        "alpha": best_params["rp3_alpha"],
        "beta": best_params["rp3_beta"],
        "delta": best_params["rp3_delta"],
        "min_rating": best_params["rp3_min_rating"],
        "implicit": best_params["rp3_implicit"],
        "normalize_similarity": best_params["rp3_normalize"]
    }
    
    # SLIM parameters
    slim_params = {
        "l1_ratio": best_params["slim_l1_ratio"],
        "alpha": best_params["slim_alpha"],
        "positive_only": best_params["slim_positive_only"],
        "topK": best_params["slim_topK"],
        "do_feature_selection": best_params["do_feature_selection"]
    }
    
    # Hybrid weights
    hybrid_weights = [
        best_params["rp3beta_weight"],
        best_params["slim_weight"]
    ]
    
    # Train RP3beta Recommender
    logging.info("Training RP3beta Recommender...")
    rp3beta_recommender = RP3betaRecommenderICM(URM_train=urm_all, ICM=icm)
    rp3beta_recommender.fit(**rp3beta_params)
    logging.info("RP3beta Recommender trained successfully.")
    
    # Train SLIM Recommender
    logging.info("Training SLIM ElasticNet Recommender...")
    slim_recommender = SLIMElasticNetRecommender(URM_train=urm_all)
    slim_recommender.fit(**slim_params)
    logging.info("SLIM ElasticNet Recommender trained successfully.")
    
    # Combine into Hybrid Recommender
    logging.info("Training Hybrid Recommender...")
    hybrid_recommender = HybridRecommender(
        URM_train=urm_all,
        recommender_list=[rp3beta_recommender, slim_recommender],
        recommender_weights=hybrid_weights
    )
    logging.info("Hybrid Recommender successfully trained.")
    
    return hybrid_recommender

# Fit the hybrid model with optimized parameters
hybrid_model = train_hybrid_with_best_params(
    urm_all=urm_all,
    icm=icm_matrix,
    best_params=best_params
)

# Optionally save the trained hybrid model
output_model_file = "hybrid_recommender.pkl"
import pickle
with open(output_model_file, "wb") as file:
    pickle.dump(hybrid_model, file)
logging.info(f"Hybrid model saved to {output_model_file}.")
