In [7]:
import os
ROOT = os.path.join('..', '..')
import sys
sys.path.append(ROOT)
# 
import numpy as np
np.random.seed(42)

# from src.models import RankNetRecommender

In [8]:
from src.utils import read_ml, ml_train_test_split

ml_complete_df, ml_users_df, ml_ratings_df, ml_movies_df, ml_genres_df = read_ml()

MAX_K = 20
ml_ratings_train_df, ml_ratings_test_df = ml_train_test_split(ml_ratings_df=ml_ratings_df, min_user_test_samples=MAX_K)

In [9]:
import numpy as np
import pandas as pd
from sklearn.decomposition import NMF
from sklearn.preprocessing import LabelEncoder
from src.models.base import BaseRecommender
from src.models import RankNetRecommender
import torch

class CandidateGenerator:
    def __init__(self, n_components=20):
        self.n_components = n_components
        self.model = NMF(n_components=self.n_components, init='random', random_state=0)
        self.user_encoder = LabelEncoder()
        self.movie_encoder = LabelEncoder()

    def fit(self, ml_ratings_train_df):
        self.user_encoder.fit(ml_ratings_train_df['UserID'])
        self.movie_encoder.fit(ml_ratings_train_df['MovieID'])
        
        user_ids = self.user_encoder.transform(ml_ratings_train_df['UserID'])
        movie_ids = self.movie_encoder.transform(ml_ratings_train_df['MovieID'])
        ratings = ml_ratings_train_df['Rating'].values
        
        rating_matrix = np.zeros((len(self.user_encoder.classes_), len(self.movie_encoder.classes_)))
        rating_matrix[user_ids, movie_ids] = ratings
        
        self.model.fit(rating_matrix)
        self.user_features = self.model.transform(rating_matrix)
        self.movie_features = self.model.components_.T

    def generate_candidates(self, user_id, top_n=100):
        try:
            user_index = self.user_encoder.transform([user_id])[0]
            user_features = self.user_features[user_index]
        except ValueError:
            user_features = np.mean(self.user_features, axis=0)
        
        user_ratings = np.dot(user_features, self.movie_features.T)
        
        movie_indices = np.argpartition(user_ratings, -top_n)[-top_n:]
        movie_ids = self.movie_encoder.inverse_transform(movie_indices)
        
        return movie_ids, user_ratings[movie_indices]

class TwoStageRecommender(BaseRecommender):
    def __init__(self, ml_movies_df, ml_users_df, candidate_generator, ranker):
        super().__init__(ml_movies_df, ml_users_df)
        self.candidate_generator = candidate_generator
        self.ranker = ranker

    def fit(self, ml_ratings_train_df):
        self.candidate_generator.fit(ml_ratings_train_df)
        self.ranker.fit(ml_ratings_train_df)

    def predict(self, user_id, n_recommendations=10):
        candidates, candidate_scores = self.candidate_generator.generate_candidates(user_id, top_n=100)
        ranker_scores = self.ranker.predict(user_id, n_recommendations=len(candidates))
        
        ranker_scores = ranker_scores.reindex(candidates, fill_value=0)
        
        final_scores = 0.3 * candidate_scores + 0.7 * ranker_scores['Score'].values
        
        top_indices = np.argpartition(final_scores, -n_recommendations)[-n_recommendations:]
        top_candidates = candidates[top_indices]
        top_scores = final_scores[top_indices]
        
        return pd.DataFrame({
            'MovieID': top_candidates,
            'Score': top_scores
        }).set_index('MovieID')

# Usage example
candidate_generator = CandidateGenerator(n_components=50)
ranker = RankNetRecommender(ml_movies_df, ml_users_df, epochs=1)

recommender = TwoStageRecommender(ml_movies_df, ml_users_df, candidate_generator, ranker)
recommender.fit(ml_ratings_train_df)

metrics = recommender.evaluate(ml_ratings_test_df, k=10)
print("Predictive metrics")
print("Average Precision@K:", round(np.mean(metrics["precision@k"]), 4))
print("Average Recall@K:", round(np.mean(metrics["recall@k"]), 4))
print("Average F1@K:", round(np.mean(metrics["f1@k"]), 4))
print("Average NDCG:", round(np.mean(metrics["ndcg"]), 4))
print("Mean Reciprocal Rank:", round(np.mean(metrics["reciprocal_rank"]), 4))

Epoch 1/1: 100%|██████████| 121/121 [00:01<00:00, 68.17it/s]


Epoch 1/1, Loss: 0.6936


100%|██████████| 769/769 [01:16<00:00, 10.06it/s]

Predictive metrics
Average Precision@K: 0.0951
Average Recall@K: 0.011
Average F1@K: 0.0183
Average NDCG: 0.2534
Mean Reciprocal Rank: 0.7725





In [10]:
metrics = recommender.evaluate(ml_ratings_test_df, k=MAX_K)
print("Predictive metrics")
print("Average Precision@K:", round(np.mean(metrics["precision@k"]), 4))
print("Average Recall@K:", round(np.mean(metrics["recall@k"]), 4))
print("Average F1@K:", round(np.mean(metrics["f1@k"]), 4))
print("Average Average Precision:", round(np.mean(metrics["average_precision"]), 4))
print()
print("Best Precision@K:", round(np.max(metrics["precision@k"]), 4))
print("Best Recall@K:", round(np.max(metrics["recall@k"]), 4))
print("Best F1@K:", round(np.max(metrics["f1@k"]), 4))
print("Best Average Precision", round(np.max(metrics["average_precision"]), 4))
print()
print("Ranking metrics")
print("MRR:", round(np.mean(metrics["reciprocal_rank"]), 4))
print("Average NDCG:", round(np.mean(metrics["ndcg"]), 4))

100%|██████████| 769/769 [01:20<00:00,  9.49it/s]

Predictive metrics
Average Precision@K: 0.1071
Average Recall@K: 0.0243
Average F1@K: 0.035
Average Average Precision: 0.1127

Best Precision@K: 0.95
Best Recall@K: 0.25
Best F1@K: 0.2917
Best Average Precision 0.9753

Ranking metrics
MRR: 0.6105
Average NDCG: 0.311



